feat(infra): auto-commit con 29 cambios

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-14 02:06:44 +02:00
parent 47fac22230
commit ca1bf5a59b
29 changed files with 2148 additions and 11 deletions
@@ -12,11 +12,14 @@ import (

 func cmdDoctor(args []string) {
 	jsonOut := false
+	emitClaudeMd := false
 	sub := ""
 	for _, a := range args {
 		switch a {
 		case "--json":
 			jsonOut = true
+		case "--emit-claude-md":
+			emitClaudeMd = true
 		case "-h", "--help":
 			doctorUsage()
 			return
@@ -51,7 +54,11 @@ func cmdDoctor(args []string) {
 	case "copied-code":
 		doctorCopiedCode(r, jsonOut)
 	case "capabilities":
-		doctorCapabilities(r, jsonOut)
+		if emitClaudeMd {
+			doctorCapabilitiesEmitMd(r)
+		} else {
+			doctorCapabilities(r, jsonOut)
+		}
 	default:
 		fmt.Fprintf(os.Stderr, "unknown doctor subcommand: %s\n", sub)
 		doctorUsage()
@@ -79,7 +86,8 @@ Subcommands:
  capabilities    Drift entre docs/capabilities/INDEX.md, tags de funciones, y paginas <grupo>.md (issue 0086)

 Flags:
-  --json          Salida JSON (para scripting/agentes)`)
+  --json              Salida JSON (para scripting/agentes)
+  --emit-claude-md    (solo capabilities) Genera bloque markdown para CLAUDE.md`)
 }

 func doctorAll(root string, jsonOut bool) {
@@ -432,6 +440,15 @@ func doctorCapabilities(root string, jsonOut bool) {
 	fmt.Printf("\n%d/%d capability groups healthy.\n", len(audits)-bad, len(audits))
 }

+func doctorCapabilitiesEmitMd(root string) {
+	result, err := infra.EmitCapabilitiesMd(root)
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "error: %v\n", err)
+		os.Exit(1)
+	}
+	fmt.Print(infra.RenderCapabilitiesMd(result))
+}
+
 func doctorCopiedCode(root string, jsonOut bool) {
 	entries, err := infra.AuditCopiedCode(root)
 	if err != nil {
@@ -49,6 +49,8 @@ func main() {
 		cmdVault(os.Args[2:])
 	case "doctor":
 		cmdDoctor(os.Args[2:])
+	case "match":
+		cmdMatch(os.Args[2:])
 	case "help", "-h", "--help":
 		printUsage()
 	default:
@@ -77,7 +79,9 @@ Usage:
  fn sync    [status|locations]            Sincroniza con servidor central
  fn vault   <list|search|index|info>      Gestiona y busca en data vaults
  fn doctor  [artefacts|services|sync|uses-functions|unused] [--json]
-                                           Diagnostico read-only del registry`)
+                                           Diagnostico read-only del registry
+  fn match   [--top N] [--format json|text] [--min-score F] "<cmd>"
+                                           Fuzzy match entre comando shell y funciones del registry`)
 }

 func root() string {
@@ -0,0 +1,553 @@
+package main
+
+import (
+	"database/sql"
+	"encoding/json"
+	"fmt"
+	"math"
+	"os"
+	"path/filepath"
+	"regexp"
+	"sort"
+	"strconv"
+	"strings"
+	"unicode"
+
+	_ "github.com/mattn/go-sqlite3"
+)
+
+// matchResult holds one candidate function match.
+type matchResult struct {
+	ID             string  `json:"id"`
+	Score          float64 `json:"score"`     // normalized (top=1.0)
+	RawScore       float64 `json:"raw_score"` // absolute, pre-normalization. Use for confidence gates.
+	Signature      string  `json:"signature"`
+	Snippet        string  `json:"snippet"`
+	Lang           string  `json:"-"`
+	Name           string  `json:"-"`
+	Tags           string  `json:"-"`
+	HighConfidence bool    `json:"-"` // filled after ranking
+}
+
+// matchOutput is the JSON envelope returned by fn match.
+type matchOutput struct {
+	Query          string        `json:"query"`
+	Top            []matchResult `json:"top"`
+	HighConfidence bool          `json:"high_confidence"`
+}
+
+// fts5Row is a raw row from the FTS query.
+type fts5Row struct {
+	id          string
+	name        string
+	lang        string
+	signature   string
+	description string
+	tags        string
+	rank        float64
+}
+
+// --- tokenizer ---------------------------------------------------------
+
+var (
+	reNonAlnum   = regexp.MustCompile(`[^a-zA-Z0-9]+`)
+	reFlag       = regexp.MustCompile(`^-{1,2}[a-zA-Z]`)
+	reAbsPath    = regexp.MustCompile(`^(/|[A-Za-z]:\\|\\\\)`)
+	rePureNumber = regexp.MustCompile(`^\d+$`)
+)
+
+// domainStopwords are tokens so generic in this codebase that they add noise
+// rather than signal to the matcher (they match hundreds of functions equally).
+var domainStopwords = map[string]bool{
+	"registry": true, "function": true, "functions": true,
+	"app": true, "apps": true, "file": true, "files": true,
+	"get": true, "set": true, "run": true, "list": true, "add": true,
+	"new": true, "all": true, "the": true, "and": true, "for": true,
+	"use": true, "fmt": true, "log": true, "err": true, "nil": true,
+	"true": true, "false": true, "var": true, "val": true, "str": true,
+	"tmp": true, "out": true, "src": true, "dst": true, "opt": true,
+	"usr": true, "etc": true, "bin": true, "lib": true, "mnt": true,
+	"home": true, "root": true, "host": true, "user": true, "name": true,
+	"path": true, "type": true, "data": true, "info": true, "init": true,
+	"main": true, "test": true, "util": true, "base": true, "core": true,
+	"api":  true, "url":  true, "uri":  true, "http": true, "html": true,
+	"json": true, "yaml": true, "toml": true, "conf": true, "config": true,
+	"dir":  true, "map":  true, "key":  true, "obj":  true,
+	"ctx":  true, "pkg":  true, "mod":  true, "cmd":  true, "cli":  true,
+	"help": true, "read": true, "open": true, "close": true, "stop": true,
+	"start": true, "end": true, "begin": true, "done": true, "make": true,
+	"build": true, "check": true, "scan": true, "load": true, "save": true,
+	"send": true, "recv": true, "show": true, "print": true, "write": true,
+	"create": true, "update": true, "delete": true, "remove": true,
+	"desktop": true, "lucas": true, "windows": true, "linux": true,
+}
+
+// tokenize splits a shell command into significant lowercase tokens.
+// It discards flags, absolute paths (keeping basenames), pure numbers,
+// and short tokens (< 3 chars).
+func tokenize(cmd string) []string {
+	// Replace common shell operators with spaces so they act as separators
+	cmd = strings.NewReplacer("|", " ", ";", " ", "&&", " ", "||", " ",
+		"(", " ", ")", " ", "{", " ", "}", " ").Replace(cmd)
+
+	parts := strings.Fields(cmd)
+	seen := map[string]bool{}
+	var tokens []string
+
+	for _, p := range parts {
+		// Skip flags like -v, --port, /F, /IM
+		if reFlag.MatchString(p) || (len(p) > 1 && p[0] == '/') {
+			continue
+		}
+		// Handle paths: keep only basename without extension
+		if reAbsPath.MatchString(p) || strings.ContainsAny(p, "/\\") {
+			p = filepath.Base(p)
+			ext := filepath.Ext(p)
+			if ext != "" {
+				p = strings.TrimSuffix(p, ext)
+				// also add ext without dot
+				extTok := strings.ToLower(strings.TrimPrefix(ext, "."))
+				if len(extTok) >= 3 && !seen[extTok] {
+					seen[extTok] = true
+					tokens = append(tokens, extTok)
+				}
+			}
+		}
+		// Split remaining by non-alphanumeric chars
+		subparts := reNonAlnum.Split(p, -1)
+		for _, sp := range subparts {
+			tok := strings.ToLower(sp)
+			if len(tok) < 3 {
+				continue
+			}
+			if rePureNumber.MatchString(tok) {
+				continue
+			}
+			if seen[tok] {
+				continue
+			}
+			if domainStopwords[tok] {
+				continue
+			}
+			seen[tok] = true
+			tokens = append(tokens, tok)
+		}
+	}
+	return tokens
+}
+
+// buildFTSQuery constructs a safe FTS5 OR query from tokens.
+// Tokens with special FTS5 characters are wrapped in double quotes.
+func buildFTSQuery(tokens []string) string {
+	if len(tokens) == 0 {
+		return ""
+	}
+	var parts []string
+	specialChars := `"'()^*:-.`
+	for _, tok := range tokens {
+		needsQuoting := false
+		for _, c := range tok {
+			if strings.ContainsRune(specialChars, c) {
+				needsQuoting = true
+				break
+			}
+		}
+		if needsQuoting {
+			// escape inner double quotes
+			escaped := strings.ReplaceAll(tok, `"`, `""`)
+			parts = append(parts, `"`+escaped+`"`)
+		} else {
+			parts = append(parts, tok)
+		}
+	}
+	return strings.Join(parts, " OR ")
+}
+
+// --- language penalty heuristics ---------------------------------------
+
+// pythonMarkers are tokens that strongly suggest Python code.
+var pythonMarkers = map[string]bool{
+	"def": true, "import": true, "class": true, "elif": true,
+	"self": true, "lambda": true, "yield": true, "async": true,
+	"await": true, "with": true,
+}
+
+// bashMarkers are tokens that strongly suggest Bash code.
+var bashMarkers = map[string]bool{
+	"chmod": true, "chown": true, "grep": true, "awk": true,
+	"sed": true, "curl": true, "wget": true, "ssh": true,
+	"rsync": true, "systemctl": true, "apt": true, "yum": true,
+	"taskkill": true, "cmd": true, "powershell": true,
+	"exe": true, "bat": true,
+}
+
+func hasPythonMarkers(tokens []string) bool {
+	for _, t := range tokens {
+		if pythonMarkers[t] {
+			return true
+		}
+	}
+	return false
+}
+
+func hasBashMarkers(tokens []string) bool {
+	for _, t := range tokens {
+		if bashMarkers[t] {
+			return true
+		}
+	}
+	return false
+}
+
+// --- scoring -----------------------------------------------------------
+
+// scoreHit computes a composite score for a single FTS5 hit.
+// bm25 from SQLite is negative (more negative = better match).
+// Scoring uses an additive boost model: each token that matches a field
+// contributes a flat bonus (name=3.0, tags=2.0, signature=1.5).  The total
+// bonus is added to the base BM25 score, not multiplied per-token.  This
+// prevents runaway clamping when many tokens all match different functions
+// equally (dashboard + registry + exe → all score 1.0 with the old model).
+func scoreHit(row fts5Row, tokens []string, hasPython, hasBash bool) float64 {
+	// Base score from BM25 rank (negative -> positive, bounded [0,1])
+	base := 1.0 / (1.0 + math.Abs(row.rank))
+
+	nameLower := strings.ToLower(row.name)
+	tagsLower := strings.ToLower(row.tags)
+	sigLower := strings.ToLower(row.signature)
+	descLower := strings.ToLower(row.description)
+
+	var boost float64
+	for _, tok := range tokens {
+		// Use best-field bonus per token (additive across tokens, not multiplicative)
+		tokBoost := 0.0
+		if strings.Contains(nameLower, tok) && tokBoost < 3.0 {
+			tokBoost = 3.0
+		}
+		if strings.Contains(tagsLower, tok) && tokBoost < 2.0 {
+			tokBoost = 2.0
+		}
+		if strings.Contains(sigLower, tok) && tokBoost < 1.5 {
+			tokBoost = 1.5
+		}
+		if strings.Contains(descLower, tok) && tokBoost < 1.0 {
+			tokBoost = 1.0
+		}
+		boost += tokBoost
+	}
+
+	// Language penalties (applied to total, not per-token)
+	penalty := 1.0
+	langLower := strings.ToLower(row.lang)
+	if hasPython && langLower == "bash" {
+		penalty = 0.5
+	}
+	if hasBash && langLower == "py" {
+		penalty = 0.5
+	}
+
+	// No clamping — scores differentiate via normalisation in the caller
+	return (base + boost) * penalty
+}
+
+// snippet returns the first ~120 chars of description, trimmed cleanly.
+func snippet(description string, maxLen int) string {
+	description = strings.Map(func(r rune) rune {
+		if unicode.IsControl(r) && r != '\t' {
+			return ' '
+		}
+		return r
+	}, description)
+	description = strings.TrimSpace(description)
+	if len(description) <= maxLen {
+		return description
+	}
+	// Cut at last space before maxLen
+	cut := description[:maxLen]
+	if idx := strings.LastIndex(cut, " "); idx > maxLen/2 {
+		cut = cut[:idx]
+	}
+	return cut + "..."
+}
+
+// --- FTS5 query --------------------------------------------------------
+
+// ftsOnlyQuery returns id + rank from the FTS virtual table only.
+// bm25() must be used without JOIN — it only works in direct FTS queries.
+const ftsOnlyQuery = `
+SELECT id, bm25(functions_fts) AS rank
+FROM functions_fts
+WHERE functions_fts MATCH ?
+ORDER BY rank
+LIMIT 50
+`
+
+// fnDetailQuery fetches metadata for a list of IDs.
+const fnDetailQuery = `
+SELECT id, name, lang, signature, description, COALESCE(tags, '[]')
+FROM functions
+WHERE id IN (%s)
+`
+
+func runMatch(dbPath string, query string, topN int, minScore float64) ([]matchResult, error) {
+	tokens := tokenize(query)
+	if len(tokens) == 0 {
+		return nil, fmt.Errorf("no significant tokens extracted from: %q", query)
+	}
+
+	ftsQ := buildFTSQuery(tokens)
+	if ftsQ == "" {
+		return nil, fmt.Errorf("could not build FTS query")
+	}
+
+	// Open normally (not strict read-only) so WAL frames are visible.
+	// bm25() with mode=ro fails with "missing row from content table" when
+	// the WAL has not been checkpointed — the FTS index references rows that
+	// aren't in the main db file yet. We never write anything here.
+	conn, err := sql.Open("sqlite3", dbPath)
+	if err != nil {
+		return nil, fmt.Errorf("opening db: %w", err)
+	}
+	defer conn.Close()
+
+	// Step 1: FTS-only query to get ids + bm25 ranks (no JOIN)
+	ftsRows, err := conn.Query(ftsOnlyQuery, ftsQ)
+	if err != nil {
+		return nil, fmt.Errorf("fts query: %w", err)
+	}
+	type idRank struct {
+		id   string
+		rank float64
+	}
+	var ranked []idRank
+	for ftsRows.Next() {
+		var r idRank
+		if err := ftsRows.Scan(&r.id, &r.rank); err != nil {
+			continue
+		}
+		ranked = append(ranked, r)
+	}
+	ftsRows.Close()
+
+	if len(ranked) == 0 {
+		return nil, nil
+	}
+
+	// Step 2: fetch metadata for those IDs with a regular SELECT
+	rankMap := make(map[string]float64, len(ranked))
+	ids := make([]string, 0, len(ranked))
+	placeholders := make([]string, 0, len(ranked))
+	args := make([]any, 0, len(ranked))
+	for _, r := range ranked {
+		rankMap[r.id] = r.rank
+		ids = append(ids, r.id)
+		placeholders = append(placeholders, "?")
+		args = append(args, r.id)
+	}
+
+	detailSQL := fmt.Sprintf(fnDetailQuery, strings.Join(placeholders, ","))
+	detailRows, err := conn.Query(detailSQL, args...)
+	if err != nil {
+		return nil, fmt.Errorf("detail query: %w", err)
+	}
+	defer detailRows.Close()
+
+	hasPython := hasPythonMarkers(tokens)
+	hasBash := hasBashMarkers(tokens)
+
+	var results []matchResult
+	for detailRows.Next() {
+		var r fts5Row
+		if err := detailRows.Scan(&r.id, &r.name, &r.lang, &r.signature, &r.description, &r.tags); err != nil {
+			continue
+		}
+		r.rank = rankMap[r.id]
+		score := scoreHit(r, tokens, hasPython, hasBash)
+		results = append(results, matchResult{
+			ID:        r.id,
+			Score:     score, // rounded after normalisation below
+			Signature: r.signature,
+			Snippet:   snippet(r.description, 120),
+			Lang:      r.lang,
+			Name:      r.name,
+			Tags:      r.tags,
+		})
+	}
+
+	// Sort by score descending
+	sort.Slice(results, func(i, j int) bool {
+		return results[i].Score > results[j].Score
+	})
+
+	// Preserva raw_score (absoluto) ANTES de normalizar — sirve para gates
+	// de confidence absoluto. La normalizacion estetica enmascara queries
+	// debiles donde el top hit es solo el "menos malo" pero realmente no
+	// matchea — sin raw, high_confidence sobre normalized siempre dispara.
+	for i := range results {
+		results[i].RawScore = results[i].Score
+	}
+	// Normalise scores so the top result is 1.0 and the rest are relative.
+	// This makes the output stable and meaningful regardless of token count.
+	if len(results) > 0 && results[0].Score > 0 {
+		maxScore := results[0].Score
+		for i := range results {
+			results[i].Score = math.Round((results[i].Score/maxScore)*1000) / 1000
+		}
+	}
+
+	// Filter by min score
+	var filtered []matchResult
+	for _, r := range results {
+		if r.Score >= minScore {
+			filtered = append(filtered, r)
+		}
+	}
+
+	// Limit to topN
+	if len(filtered) > topN {
+		filtered = filtered[:topN]
+	}
+
+	return filtered, nil
+}
+
+// --- command -----------------------------------------------------------
+
+func cmdMatch(args []string) {
+	topN := 3
+	format := "json"
+	minScore := 0.3
+	var queryArg string
+
+	for i := 0; i < len(args); i++ {
+		switch args[i] {
+		case "--top", "-n":
+			i++
+			if i < len(args) {
+				if n, err := strconv.Atoi(args[i]); err == nil && n > 0 {
+					topN = n
+				}
+			}
+		case "--format", "-f":
+			i++
+			if i < len(args) {
+				format = args[i]
+			}
+		case "--min-score":
+			i++
+			if i < len(args) {
+				if f, err := strconv.ParseFloat(args[i], 64); err == nil {
+					minScore = f
+				}
+			}
+		case "--help", "-h":
+			fmt.Println(`fn match — fuzzy matcher between a shell command and registry functions
+
+Usage:
+  fn match [--top N] [--format json|text] [--min-score F] "<command>"
+  echo "<command>" | fn match [--top N] [--format json|text] [--min-score F]
+
+Flags:
+  --top N        Return top N results (default: 3)
+  --format       Output format: json (default) or text
+  --min-score F  Minimum score threshold 0..1 (default: 0.3)
+
+Example:
+  fn match "taskkill.exe /IM registry_dashboard.exe /F"
+  fn match --top 5 --format text "curl -sf https://api.example.com/health"
+  echo "rsync -avz --exclude .git src/ user@host:/opt/app" | fn match`)
+			return
+		default:
+			if !strings.HasPrefix(args[i], "-") {
+				queryArg = args[i]
+			}
+		}
+	}
+
+	// Try stdin if no positional arg
+	if queryArg == "" {
+		stat, err := os.Stdin.Stat()
+		if err == nil && (stat.Mode()&os.ModeCharDevice) == 0 {
+			var sb strings.Builder
+			buf := make([]byte, 4096)
+			for {
+				n, err := os.Stdin.Read(buf)
+				if n > 0 {
+					sb.Write(buf[:n])
+				}
+				if err != nil {
+					break
+				}
+			}
+			queryArg = strings.TrimSpace(sb.String())
+		}
+	}
+
+	if queryArg == "" {
+		fmt.Fprintln(os.Stderr, "fn match: no command provided. Use --help for usage.")
+		os.Exit(1)
+	}
+
+	dbPath := filepath.Join(root(), dbName)
+	hits, err := runMatch(dbPath, queryArg, topN, minScore)
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "fn match: %v\n", err)
+		os.Exit(1)
+	}
+
+	// Compute high_confidence flag. Doble gate:
+	//   1. RAW score >= 3.0 — al menos un token con match fuerte de campo
+	//      (name=3.0 / tags=2.0 / signature=1.5 / description=1.0). Sin esto,
+	//      la normalizacion devolveria 1.0 incluso para queries que no
+	//      matchean nada bien (ej. "kelly criterion" -> graph_renderer score
+	//      raw < 1.0 pero normalized = 1.0).
+	//   2. Gap top1/top2 > 1.5 (en raw, no normalized) — el top destaca
+	//      sobre el siguiente, no es un cluster de matches mediocres.
+	const minRawForHighConf = 4.0
+	highConf := false
+	if len(hits) >= 1 && hits[0].RawScore >= minRawForHighConf {
+		if len(hits) >= 2 && hits[1].RawScore > 0 {
+			highConf = hits[0].RawScore/hits[1].RawScore > 1.5
+		} else {
+			highConf = true // solo un hit con raw alta
+		}
+	}
+
+	switch format {
+	case "text":
+		printMatchText(queryArg, hits, highConf)
+	default:
+		printMatchJSON(queryArg, hits, highConf)
+	}
+}
+
+func printMatchJSON(query string, hits []matchResult, highConf bool) {
+	out := matchOutput{
+		Query:          query,
+		Top:            hits,
+		HighConfidence: highConf,
+	}
+	if out.Top == nil {
+		out.Top = []matchResult{}
+	}
+	enc := json.NewEncoder(os.Stdout)
+	enc.SetIndent("", "  ")
+	enc.Encode(out)
+}
+
+func printMatchText(query string, hits []matchResult, highConf bool) {
+	conf := ""
+	if highConf {
+		conf = " [HIGH CONFIDENCE]"
+	}
+	fmt.Printf("TOP MATCHES for: %s%s\n", query, conf)
+	if len(hits) == 0 {
+		fmt.Println("  (no matches above threshold)")
+		return
+	}
+	for _, h := range hits {
+		fmt.Printf("  [%.3f] %s\n", h.Score, h.ID)
+		fmt.Printf("         %s\n", h.Signature)
+		fmt.Printf("         %s\n", h.Snippet)
+	}
+}