feat(infra): auto-commit con 29 cambios

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-14 02:06:44 +02:00
parent 47fac22230
commit ca1bf5a59b
29 changed files with 2148 additions and 11 deletions
+19 -2
View File
@@ -12,11 +12,14 @@ import (
func cmdDoctor(args []string) {
jsonOut := false
emitClaudeMd := false
sub := ""
for _, a := range args {
switch a {
case "--json":
jsonOut = true
case "--emit-claude-md":
emitClaudeMd = true
case "-h", "--help":
doctorUsage()
return
@@ -51,7 +54,11 @@ func cmdDoctor(args []string) {
case "copied-code":
doctorCopiedCode(r, jsonOut)
case "capabilities":
doctorCapabilities(r, jsonOut)
if emitClaudeMd {
doctorCapabilitiesEmitMd(r)
} else {
doctorCapabilities(r, jsonOut)
}
default:
fmt.Fprintf(os.Stderr, "unknown doctor subcommand: %s\n", sub)
doctorUsage()
@@ -79,7 +86,8 @@ Subcommands:
capabilities Drift entre docs/capabilities/INDEX.md, tags de funciones, y paginas <grupo>.md (issue 0086)
Flags:
--json Salida JSON (para scripting/agentes)`)
--json Salida JSON (para scripting/agentes)
--emit-claude-md (solo capabilities) Genera bloque markdown para CLAUDE.md`)
}
func doctorAll(root string, jsonOut bool) {
@@ -432,6 +440,15 @@ func doctorCapabilities(root string, jsonOut bool) {
fmt.Printf("\n%d/%d capability groups healthy.\n", len(audits)-bad, len(audits))
}
func doctorCapabilitiesEmitMd(root string) {
result, err := infra.EmitCapabilitiesMd(root)
if err != nil {
fmt.Fprintf(os.Stderr, "error: %v\n", err)
os.Exit(1)
}
fmt.Print(infra.RenderCapabilitiesMd(result))
}
func doctorCopiedCode(root string, jsonOut bool) {
entries, err := infra.AuditCopiedCode(root)
if err != nil {
+5 -1
View File
@@ -49,6 +49,8 @@ func main() {
cmdVault(os.Args[2:])
case "doctor":
cmdDoctor(os.Args[2:])
case "match":
cmdMatch(os.Args[2:])
case "help", "-h", "--help":
printUsage()
default:
@@ -77,7 +79,9 @@ Usage:
fn sync [status|locations] Sincroniza con servidor central
fn vault <list|search|index|info> Gestiona y busca en data vaults
fn doctor [artefacts|services|sync|uses-functions|unused] [--json]
Diagnostico read-only del registry`)
Diagnostico read-only del registry
fn match [--top N] [--format json|text] [--min-score F] "<cmd>"
Fuzzy match entre comando shell y funciones del registry`)
}
func root() string {
+553
View File
@@ -0,0 +1,553 @@
package main
import (
"database/sql"
"encoding/json"
"fmt"
"math"
"os"
"path/filepath"
"regexp"
"sort"
"strconv"
"strings"
"unicode"
_ "github.com/mattn/go-sqlite3"
)
// matchResult holds one candidate function match.
type matchResult struct {
ID string `json:"id"`
Score float64 `json:"score"` // normalized (top=1.0)
RawScore float64 `json:"raw_score"` // absolute, pre-normalization. Use for confidence gates.
Signature string `json:"signature"`
Snippet string `json:"snippet"`
Lang string `json:"-"`
Name string `json:"-"`
Tags string `json:"-"`
HighConfidence bool `json:"-"` // filled after ranking
}
// matchOutput is the JSON envelope returned by fn match.
type matchOutput struct {
Query string `json:"query"`
Top []matchResult `json:"top"`
HighConfidence bool `json:"high_confidence"`
}
// fts5Row is a raw row from the FTS query.
type fts5Row struct {
id string
name string
lang string
signature string
description string
tags string
rank float64
}
// --- tokenizer ---------------------------------------------------------
var (
reNonAlnum = regexp.MustCompile(`[^a-zA-Z0-9]+`)
reFlag = regexp.MustCompile(`^-{1,2}[a-zA-Z]`)
reAbsPath = regexp.MustCompile(`^(/|[A-Za-z]:\\|\\\\)`)
rePureNumber = regexp.MustCompile(`^\d+$`)
)
// domainStopwords are tokens so generic in this codebase that they add noise
// rather than signal to the matcher (they match hundreds of functions equally).
var domainStopwords = map[string]bool{
"registry": true, "function": true, "functions": true,
"app": true, "apps": true, "file": true, "files": true,
"get": true, "set": true, "run": true, "list": true, "add": true,
"new": true, "all": true, "the": true, "and": true, "for": true,
"use": true, "fmt": true, "log": true, "err": true, "nil": true,
"true": true, "false": true, "var": true, "val": true, "str": true,
"tmp": true, "out": true, "src": true, "dst": true, "opt": true,
"usr": true, "etc": true, "bin": true, "lib": true, "mnt": true,
"home": true, "root": true, "host": true, "user": true, "name": true,
"path": true, "type": true, "data": true, "info": true, "init": true,
"main": true, "test": true, "util": true, "base": true, "core": true,
"api": true, "url": true, "uri": true, "http": true, "html": true,
"json": true, "yaml": true, "toml": true, "conf": true, "config": true,
"dir": true, "map": true, "key": true, "obj": true,
"ctx": true, "pkg": true, "mod": true, "cmd": true, "cli": true,
"help": true, "read": true, "open": true, "close": true, "stop": true,
"start": true, "end": true, "begin": true, "done": true, "make": true,
"build": true, "check": true, "scan": true, "load": true, "save": true,
"send": true, "recv": true, "show": true, "print": true, "write": true,
"create": true, "update": true, "delete": true, "remove": true,
"desktop": true, "lucas": true, "windows": true, "linux": true,
}
// tokenize splits a shell command into significant lowercase tokens.
// It discards flags, absolute paths (keeping basenames), pure numbers,
// and short tokens (< 3 chars).
func tokenize(cmd string) []string {
// Replace common shell operators with spaces so they act as separators
cmd = strings.NewReplacer("|", " ", ";", " ", "&&", " ", "||", " ",
"(", " ", ")", " ", "{", " ", "}", " ").Replace(cmd)
parts := strings.Fields(cmd)
seen := map[string]bool{}
var tokens []string
for _, p := range parts {
// Skip flags like -v, --port, /F, /IM
if reFlag.MatchString(p) || (len(p) > 1 && p[0] == '/') {
continue
}
// Handle paths: keep only basename without extension
if reAbsPath.MatchString(p) || strings.ContainsAny(p, "/\\") {
p = filepath.Base(p)
ext := filepath.Ext(p)
if ext != "" {
p = strings.TrimSuffix(p, ext)
// also add ext without dot
extTok := strings.ToLower(strings.TrimPrefix(ext, "."))
if len(extTok) >= 3 && !seen[extTok] {
seen[extTok] = true
tokens = append(tokens, extTok)
}
}
}
// Split remaining by non-alphanumeric chars
subparts := reNonAlnum.Split(p, -1)
for _, sp := range subparts {
tok := strings.ToLower(sp)
if len(tok) < 3 {
continue
}
if rePureNumber.MatchString(tok) {
continue
}
if seen[tok] {
continue
}
if domainStopwords[tok] {
continue
}
seen[tok] = true
tokens = append(tokens, tok)
}
}
return tokens
}
// buildFTSQuery constructs a safe FTS5 OR query from tokens.
// Tokens with special FTS5 characters are wrapped in double quotes.
func buildFTSQuery(tokens []string) string {
if len(tokens) == 0 {
return ""
}
var parts []string
specialChars := `"'()^*:-.`
for _, tok := range tokens {
needsQuoting := false
for _, c := range tok {
if strings.ContainsRune(specialChars, c) {
needsQuoting = true
break
}
}
if needsQuoting {
// escape inner double quotes
escaped := strings.ReplaceAll(tok, `"`, `""`)
parts = append(parts, `"`+escaped+`"`)
} else {
parts = append(parts, tok)
}
}
return strings.Join(parts, " OR ")
}
// --- language penalty heuristics ---------------------------------------
// pythonMarkers are tokens that strongly suggest Python code.
var pythonMarkers = map[string]bool{
"def": true, "import": true, "class": true, "elif": true,
"self": true, "lambda": true, "yield": true, "async": true,
"await": true, "with": true,
}
// bashMarkers are tokens that strongly suggest Bash code.
var bashMarkers = map[string]bool{
"chmod": true, "chown": true, "grep": true, "awk": true,
"sed": true, "curl": true, "wget": true, "ssh": true,
"rsync": true, "systemctl": true, "apt": true, "yum": true,
"taskkill": true, "cmd": true, "powershell": true,
"exe": true, "bat": true,
}
func hasPythonMarkers(tokens []string) bool {
for _, t := range tokens {
if pythonMarkers[t] {
return true
}
}
return false
}
func hasBashMarkers(tokens []string) bool {
for _, t := range tokens {
if bashMarkers[t] {
return true
}
}
return false
}
// --- scoring -----------------------------------------------------------
// scoreHit computes a composite score for a single FTS5 hit.
// bm25 from SQLite is negative (more negative = better match).
// Scoring uses an additive boost model: each token that matches a field
// contributes a flat bonus (name=3.0, tags=2.0, signature=1.5). The total
// bonus is added to the base BM25 score, not multiplied per-token. This
// prevents runaway clamping when many tokens all match different functions
// equally (dashboard + registry + exe → all score 1.0 with the old model).
func scoreHit(row fts5Row, tokens []string, hasPython, hasBash bool) float64 {
// Base score from BM25 rank (negative -> positive, bounded [0,1])
base := 1.0 / (1.0 + math.Abs(row.rank))
nameLower := strings.ToLower(row.name)
tagsLower := strings.ToLower(row.tags)
sigLower := strings.ToLower(row.signature)
descLower := strings.ToLower(row.description)
var boost float64
for _, tok := range tokens {
// Use best-field bonus per token (additive across tokens, not multiplicative)
tokBoost := 0.0
if strings.Contains(nameLower, tok) && tokBoost < 3.0 {
tokBoost = 3.0
}
if strings.Contains(tagsLower, tok) && tokBoost < 2.0 {
tokBoost = 2.0
}
if strings.Contains(sigLower, tok) && tokBoost < 1.5 {
tokBoost = 1.5
}
if strings.Contains(descLower, tok) && tokBoost < 1.0 {
tokBoost = 1.0
}
boost += tokBoost
}
// Language penalties (applied to total, not per-token)
penalty := 1.0
langLower := strings.ToLower(row.lang)
if hasPython && langLower == "bash" {
penalty = 0.5
}
if hasBash && langLower == "py" {
penalty = 0.5
}
// No clamping — scores differentiate via normalisation in the caller
return (base + boost) * penalty
}
// snippet returns the first ~120 chars of description, trimmed cleanly.
func snippet(description string, maxLen int) string {
description = strings.Map(func(r rune) rune {
if unicode.IsControl(r) && r != '\t' {
return ' '
}
return r
}, description)
description = strings.TrimSpace(description)
if len(description) <= maxLen {
return description
}
// Cut at last space before maxLen
cut := description[:maxLen]
if idx := strings.LastIndex(cut, " "); idx > maxLen/2 {
cut = cut[:idx]
}
return cut + "..."
}
// --- FTS5 query --------------------------------------------------------
// ftsOnlyQuery returns id + rank from the FTS virtual table only.
// bm25() must be used without JOIN — it only works in direct FTS queries.
const ftsOnlyQuery = `
SELECT id, bm25(functions_fts) AS rank
FROM functions_fts
WHERE functions_fts MATCH ?
ORDER BY rank
LIMIT 50
`
// fnDetailQuery fetches metadata for a list of IDs.
const fnDetailQuery = `
SELECT id, name, lang, signature, description, COALESCE(tags, '[]')
FROM functions
WHERE id IN (%s)
`
func runMatch(dbPath string, query string, topN int, minScore float64) ([]matchResult, error) {
tokens := tokenize(query)
if len(tokens) == 0 {
return nil, fmt.Errorf("no significant tokens extracted from: %q", query)
}
ftsQ := buildFTSQuery(tokens)
if ftsQ == "" {
return nil, fmt.Errorf("could not build FTS query")
}
// Open normally (not strict read-only) so WAL frames are visible.
// bm25() with mode=ro fails with "missing row from content table" when
// the WAL has not been checkpointed — the FTS index references rows that
// aren't in the main db file yet. We never write anything here.
conn, err := sql.Open("sqlite3", dbPath)
if err != nil {
return nil, fmt.Errorf("opening db: %w", err)
}
defer conn.Close()
// Step 1: FTS-only query to get ids + bm25 ranks (no JOIN)
ftsRows, err := conn.Query(ftsOnlyQuery, ftsQ)
if err != nil {
return nil, fmt.Errorf("fts query: %w", err)
}
type idRank struct {
id string
rank float64
}
var ranked []idRank
for ftsRows.Next() {
var r idRank
if err := ftsRows.Scan(&r.id, &r.rank); err != nil {
continue
}
ranked = append(ranked, r)
}
ftsRows.Close()
if len(ranked) == 0 {
return nil, nil
}
// Step 2: fetch metadata for those IDs with a regular SELECT
rankMap := make(map[string]float64, len(ranked))
ids := make([]string, 0, len(ranked))
placeholders := make([]string, 0, len(ranked))
args := make([]any, 0, len(ranked))
for _, r := range ranked {
rankMap[r.id] = r.rank
ids = append(ids, r.id)
placeholders = append(placeholders, "?")
args = append(args, r.id)
}
detailSQL := fmt.Sprintf(fnDetailQuery, strings.Join(placeholders, ","))
detailRows, err := conn.Query(detailSQL, args...)
if err != nil {
return nil, fmt.Errorf("detail query: %w", err)
}
defer detailRows.Close()
hasPython := hasPythonMarkers(tokens)
hasBash := hasBashMarkers(tokens)
var results []matchResult
for detailRows.Next() {
var r fts5Row
if err := detailRows.Scan(&r.id, &r.name, &r.lang, &r.signature, &r.description, &r.tags); err != nil {
continue
}
r.rank = rankMap[r.id]
score := scoreHit(r, tokens, hasPython, hasBash)
results = append(results, matchResult{
ID: r.id,
Score: score, // rounded after normalisation below
Signature: r.signature,
Snippet: snippet(r.description, 120),
Lang: r.lang,
Name: r.name,
Tags: r.tags,
})
}
// Sort by score descending
sort.Slice(results, func(i, j int) bool {
return results[i].Score > results[j].Score
})
// Preserva raw_score (absoluto) ANTES de normalizar — sirve para gates
// de confidence absoluto. La normalizacion estetica enmascara queries
// debiles donde el top hit es solo el "menos malo" pero realmente no
// matchea — sin raw, high_confidence sobre normalized siempre dispara.
for i := range results {
results[i].RawScore = results[i].Score
}
// Normalise scores so the top result is 1.0 and the rest are relative.
// This makes the output stable and meaningful regardless of token count.
if len(results) > 0 && results[0].Score > 0 {
maxScore := results[0].Score
for i := range results {
results[i].Score = math.Round((results[i].Score/maxScore)*1000) / 1000
}
}
// Filter by min score
var filtered []matchResult
for _, r := range results {
if r.Score >= minScore {
filtered = append(filtered, r)
}
}
// Limit to topN
if len(filtered) > topN {
filtered = filtered[:topN]
}
return filtered, nil
}
// --- command -----------------------------------------------------------
func cmdMatch(args []string) {
topN := 3
format := "json"
minScore := 0.3
var queryArg string
for i := 0; i < len(args); i++ {
switch args[i] {
case "--top", "-n":
i++
if i < len(args) {
if n, err := strconv.Atoi(args[i]); err == nil && n > 0 {
topN = n
}
}
case "--format", "-f":
i++
if i < len(args) {
format = args[i]
}
case "--min-score":
i++
if i < len(args) {
if f, err := strconv.ParseFloat(args[i], 64); err == nil {
minScore = f
}
}
case "--help", "-h":
fmt.Println(`fn match — fuzzy matcher between a shell command and registry functions
Usage:
fn match [--top N] [--format json|text] [--min-score F] "<command>"
echo "<command>" | fn match [--top N] [--format json|text] [--min-score F]
Flags:
--top N Return top N results (default: 3)
--format Output format: json (default) or text
--min-score F Minimum score threshold 0..1 (default: 0.3)
Example:
fn match "taskkill.exe /IM registry_dashboard.exe /F"
fn match --top 5 --format text "curl -sf https://api.example.com/health"
echo "rsync -avz --exclude .git src/ user@host:/opt/app" | fn match`)
return
default:
if !strings.HasPrefix(args[i], "-") {
queryArg = args[i]
}
}
}
// Try stdin if no positional arg
if queryArg == "" {
stat, err := os.Stdin.Stat()
if err == nil && (stat.Mode()&os.ModeCharDevice) == 0 {
var sb strings.Builder
buf := make([]byte, 4096)
for {
n, err := os.Stdin.Read(buf)
if n > 0 {
sb.Write(buf[:n])
}
if err != nil {
break
}
}
queryArg = strings.TrimSpace(sb.String())
}
}
if queryArg == "" {
fmt.Fprintln(os.Stderr, "fn match: no command provided. Use --help for usage.")
os.Exit(1)
}
dbPath := filepath.Join(root(), dbName)
hits, err := runMatch(dbPath, queryArg, topN, minScore)
if err != nil {
fmt.Fprintf(os.Stderr, "fn match: %v\n", err)
os.Exit(1)
}
// Compute high_confidence flag. Doble gate:
// 1. RAW score >= 3.0 — al menos un token con match fuerte de campo
// (name=3.0 / tags=2.0 / signature=1.5 / description=1.0). Sin esto,
// la normalizacion devolveria 1.0 incluso para queries que no
// matchean nada bien (ej. "kelly criterion" -> graph_renderer score
// raw < 1.0 pero normalized = 1.0).
// 2. Gap top1/top2 > 1.5 (en raw, no normalized) — el top destaca
// sobre el siguiente, no es un cluster de matches mediocres.
const minRawForHighConf = 4.0
highConf := false
if len(hits) >= 1 && hits[0].RawScore >= minRawForHighConf {
if len(hits) >= 2 && hits[1].RawScore > 0 {
highConf = hits[0].RawScore/hits[1].RawScore > 1.5
} else {
highConf = true // solo un hit con raw alta
}
}
switch format {
case "text":
printMatchText(queryArg, hits, highConf)
default:
printMatchJSON(queryArg, hits, highConf)
}
}
func printMatchJSON(query string, hits []matchResult, highConf bool) {
out := matchOutput{
Query: query,
Top: hits,
HighConfidence: highConf,
}
if out.Top == nil {
out.Top = []matchResult{}
}
enc := json.NewEncoder(os.Stdout)
enc.SetIndent("", " ")
enc.Encode(out)
}
func printMatchText(query string, hits []matchResult, highConf bool) {
conf := ""
if highConf {
conf = " [HIGH CONFIDENCE]"
}
fmt.Printf("TOP MATCHES for: %s%s\n", query, conf)
if len(hits) == 0 {
fmt.Println(" (no matches above threshold)")
return
}
for _, h := range hits {
fmt.Printf(" [%.3f] %s\n", h.Score, h.ID)
fmt.Printf(" %s\n", h.Signature)
fmt.Printf(" %s\n", h.Snippet)
}
}