feat(infra): auto-commit con 29 cambios
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
+19
-2
@@ -12,11 +12,14 @@ import (
|
||||
|
||||
func cmdDoctor(args []string) {
|
||||
jsonOut := false
|
||||
emitClaudeMd := false
|
||||
sub := ""
|
||||
for _, a := range args {
|
||||
switch a {
|
||||
case "--json":
|
||||
jsonOut = true
|
||||
case "--emit-claude-md":
|
||||
emitClaudeMd = true
|
||||
case "-h", "--help":
|
||||
doctorUsage()
|
||||
return
|
||||
@@ -51,7 +54,11 @@ func cmdDoctor(args []string) {
|
||||
case "copied-code":
|
||||
doctorCopiedCode(r, jsonOut)
|
||||
case "capabilities":
|
||||
doctorCapabilities(r, jsonOut)
|
||||
if emitClaudeMd {
|
||||
doctorCapabilitiesEmitMd(r)
|
||||
} else {
|
||||
doctorCapabilities(r, jsonOut)
|
||||
}
|
||||
default:
|
||||
fmt.Fprintf(os.Stderr, "unknown doctor subcommand: %s\n", sub)
|
||||
doctorUsage()
|
||||
@@ -79,7 +86,8 @@ Subcommands:
|
||||
capabilities Drift entre docs/capabilities/INDEX.md, tags de funciones, y paginas <grupo>.md (issue 0086)
|
||||
|
||||
Flags:
|
||||
--json Salida JSON (para scripting/agentes)`)
|
||||
--json Salida JSON (para scripting/agentes)
|
||||
--emit-claude-md (solo capabilities) Genera bloque markdown para CLAUDE.md`)
|
||||
}
|
||||
|
||||
func doctorAll(root string, jsonOut bool) {
|
||||
@@ -432,6 +440,15 @@ func doctorCapabilities(root string, jsonOut bool) {
|
||||
fmt.Printf("\n%d/%d capability groups healthy.\n", len(audits)-bad, len(audits))
|
||||
}
|
||||
|
||||
func doctorCapabilitiesEmitMd(root string) {
|
||||
result, err := infra.EmitCapabilitiesMd(root)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "error: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
fmt.Print(infra.RenderCapabilitiesMd(result))
|
||||
}
|
||||
|
||||
func doctorCopiedCode(root string, jsonOut bool) {
|
||||
entries, err := infra.AuditCopiedCode(root)
|
||||
if err != nil {
|
||||
|
||||
+5
-1
@@ -49,6 +49,8 @@ func main() {
|
||||
cmdVault(os.Args[2:])
|
||||
case "doctor":
|
||||
cmdDoctor(os.Args[2:])
|
||||
case "match":
|
||||
cmdMatch(os.Args[2:])
|
||||
case "help", "-h", "--help":
|
||||
printUsage()
|
||||
default:
|
||||
@@ -77,7 +79,9 @@ Usage:
|
||||
fn sync [status|locations] Sincroniza con servidor central
|
||||
fn vault <list|search|index|info> Gestiona y busca en data vaults
|
||||
fn doctor [artefacts|services|sync|uses-functions|unused] [--json]
|
||||
Diagnostico read-only del registry`)
|
||||
Diagnostico read-only del registry
|
||||
fn match [--top N] [--format json|text] [--min-score F] "<cmd>"
|
||||
Fuzzy match entre comando shell y funciones del registry`)
|
||||
}
|
||||
|
||||
func root() string {
|
||||
|
||||
+553
@@ -0,0 +1,553 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"unicode"
|
||||
|
||||
_ "github.com/mattn/go-sqlite3"
|
||||
)
|
||||
|
||||
// matchResult holds one candidate function match.
|
||||
type matchResult struct {
|
||||
ID string `json:"id"`
|
||||
Score float64 `json:"score"` // normalized (top=1.0)
|
||||
RawScore float64 `json:"raw_score"` // absolute, pre-normalization. Use for confidence gates.
|
||||
Signature string `json:"signature"`
|
||||
Snippet string `json:"snippet"`
|
||||
Lang string `json:"-"`
|
||||
Name string `json:"-"`
|
||||
Tags string `json:"-"`
|
||||
HighConfidence bool `json:"-"` // filled after ranking
|
||||
}
|
||||
|
||||
// matchOutput is the JSON envelope returned by fn match.
|
||||
type matchOutput struct {
|
||||
Query string `json:"query"`
|
||||
Top []matchResult `json:"top"`
|
||||
HighConfidence bool `json:"high_confidence"`
|
||||
}
|
||||
|
||||
// fts5Row is a raw row from the FTS query.
|
||||
type fts5Row struct {
|
||||
id string
|
||||
name string
|
||||
lang string
|
||||
signature string
|
||||
description string
|
||||
tags string
|
||||
rank float64
|
||||
}
|
||||
|
||||
// --- tokenizer ---------------------------------------------------------
|
||||
|
||||
var (
|
||||
reNonAlnum = regexp.MustCompile(`[^a-zA-Z0-9]+`)
|
||||
reFlag = regexp.MustCompile(`^-{1,2}[a-zA-Z]`)
|
||||
reAbsPath = regexp.MustCompile(`^(/|[A-Za-z]:\\|\\\\)`)
|
||||
rePureNumber = regexp.MustCompile(`^\d+$`)
|
||||
)
|
||||
|
||||
// domainStopwords are tokens so generic in this codebase that they add noise
|
||||
// rather than signal to the matcher (they match hundreds of functions equally).
|
||||
var domainStopwords = map[string]bool{
|
||||
"registry": true, "function": true, "functions": true,
|
||||
"app": true, "apps": true, "file": true, "files": true,
|
||||
"get": true, "set": true, "run": true, "list": true, "add": true,
|
||||
"new": true, "all": true, "the": true, "and": true, "for": true,
|
||||
"use": true, "fmt": true, "log": true, "err": true, "nil": true,
|
||||
"true": true, "false": true, "var": true, "val": true, "str": true,
|
||||
"tmp": true, "out": true, "src": true, "dst": true, "opt": true,
|
||||
"usr": true, "etc": true, "bin": true, "lib": true, "mnt": true,
|
||||
"home": true, "root": true, "host": true, "user": true, "name": true,
|
||||
"path": true, "type": true, "data": true, "info": true, "init": true,
|
||||
"main": true, "test": true, "util": true, "base": true, "core": true,
|
||||
"api": true, "url": true, "uri": true, "http": true, "html": true,
|
||||
"json": true, "yaml": true, "toml": true, "conf": true, "config": true,
|
||||
"dir": true, "map": true, "key": true, "obj": true,
|
||||
"ctx": true, "pkg": true, "mod": true, "cmd": true, "cli": true,
|
||||
"help": true, "read": true, "open": true, "close": true, "stop": true,
|
||||
"start": true, "end": true, "begin": true, "done": true, "make": true,
|
||||
"build": true, "check": true, "scan": true, "load": true, "save": true,
|
||||
"send": true, "recv": true, "show": true, "print": true, "write": true,
|
||||
"create": true, "update": true, "delete": true, "remove": true,
|
||||
"desktop": true, "lucas": true, "windows": true, "linux": true,
|
||||
}
|
||||
|
||||
// tokenize splits a shell command into significant lowercase tokens.
|
||||
// It discards flags, absolute paths (keeping basenames), pure numbers,
|
||||
// and short tokens (< 3 chars).
|
||||
func tokenize(cmd string) []string {
|
||||
// Replace common shell operators with spaces so they act as separators
|
||||
cmd = strings.NewReplacer("|", " ", ";", " ", "&&", " ", "||", " ",
|
||||
"(", " ", ")", " ", "{", " ", "}", " ").Replace(cmd)
|
||||
|
||||
parts := strings.Fields(cmd)
|
||||
seen := map[string]bool{}
|
||||
var tokens []string
|
||||
|
||||
for _, p := range parts {
|
||||
// Skip flags like -v, --port, /F, /IM
|
||||
if reFlag.MatchString(p) || (len(p) > 1 && p[0] == '/') {
|
||||
continue
|
||||
}
|
||||
// Handle paths: keep only basename without extension
|
||||
if reAbsPath.MatchString(p) || strings.ContainsAny(p, "/\\") {
|
||||
p = filepath.Base(p)
|
||||
ext := filepath.Ext(p)
|
||||
if ext != "" {
|
||||
p = strings.TrimSuffix(p, ext)
|
||||
// also add ext without dot
|
||||
extTok := strings.ToLower(strings.TrimPrefix(ext, "."))
|
||||
if len(extTok) >= 3 && !seen[extTok] {
|
||||
seen[extTok] = true
|
||||
tokens = append(tokens, extTok)
|
||||
}
|
||||
}
|
||||
}
|
||||
// Split remaining by non-alphanumeric chars
|
||||
subparts := reNonAlnum.Split(p, -1)
|
||||
for _, sp := range subparts {
|
||||
tok := strings.ToLower(sp)
|
||||
if len(tok) < 3 {
|
||||
continue
|
||||
}
|
||||
if rePureNumber.MatchString(tok) {
|
||||
continue
|
||||
}
|
||||
if seen[tok] {
|
||||
continue
|
||||
}
|
||||
if domainStopwords[tok] {
|
||||
continue
|
||||
}
|
||||
seen[tok] = true
|
||||
tokens = append(tokens, tok)
|
||||
}
|
||||
}
|
||||
return tokens
|
||||
}
|
||||
|
||||
// buildFTSQuery constructs a safe FTS5 OR query from tokens.
|
||||
// Tokens with special FTS5 characters are wrapped in double quotes.
|
||||
func buildFTSQuery(tokens []string) string {
|
||||
if len(tokens) == 0 {
|
||||
return ""
|
||||
}
|
||||
var parts []string
|
||||
specialChars := `"'()^*:-.`
|
||||
for _, tok := range tokens {
|
||||
needsQuoting := false
|
||||
for _, c := range tok {
|
||||
if strings.ContainsRune(specialChars, c) {
|
||||
needsQuoting = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if needsQuoting {
|
||||
// escape inner double quotes
|
||||
escaped := strings.ReplaceAll(tok, `"`, `""`)
|
||||
parts = append(parts, `"`+escaped+`"`)
|
||||
} else {
|
||||
parts = append(parts, tok)
|
||||
}
|
||||
}
|
||||
return strings.Join(parts, " OR ")
|
||||
}
|
||||
|
||||
// --- language penalty heuristics ---------------------------------------
|
||||
|
||||
// pythonMarkers are tokens that strongly suggest Python code.
|
||||
var pythonMarkers = map[string]bool{
|
||||
"def": true, "import": true, "class": true, "elif": true,
|
||||
"self": true, "lambda": true, "yield": true, "async": true,
|
||||
"await": true, "with": true,
|
||||
}
|
||||
|
||||
// bashMarkers are tokens that strongly suggest Bash code.
|
||||
var bashMarkers = map[string]bool{
|
||||
"chmod": true, "chown": true, "grep": true, "awk": true,
|
||||
"sed": true, "curl": true, "wget": true, "ssh": true,
|
||||
"rsync": true, "systemctl": true, "apt": true, "yum": true,
|
||||
"taskkill": true, "cmd": true, "powershell": true,
|
||||
"exe": true, "bat": true,
|
||||
}
|
||||
|
||||
func hasPythonMarkers(tokens []string) bool {
|
||||
for _, t := range tokens {
|
||||
if pythonMarkers[t] {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func hasBashMarkers(tokens []string) bool {
|
||||
for _, t := range tokens {
|
||||
if bashMarkers[t] {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// --- scoring -----------------------------------------------------------
|
||||
|
||||
// scoreHit computes a composite score for a single FTS5 hit.
|
||||
// bm25 from SQLite is negative (more negative = better match).
|
||||
// Scoring uses an additive boost model: each token that matches a field
|
||||
// contributes a flat bonus (name=3.0, tags=2.0, signature=1.5). The total
|
||||
// bonus is added to the base BM25 score, not multiplied per-token. This
|
||||
// prevents runaway clamping when many tokens all match different functions
|
||||
// equally (dashboard + registry + exe → all score 1.0 with the old model).
|
||||
func scoreHit(row fts5Row, tokens []string, hasPython, hasBash bool) float64 {
|
||||
// Base score from BM25 rank (negative -> positive, bounded [0,1])
|
||||
base := 1.0 / (1.0 + math.Abs(row.rank))
|
||||
|
||||
nameLower := strings.ToLower(row.name)
|
||||
tagsLower := strings.ToLower(row.tags)
|
||||
sigLower := strings.ToLower(row.signature)
|
||||
descLower := strings.ToLower(row.description)
|
||||
|
||||
var boost float64
|
||||
for _, tok := range tokens {
|
||||
// Use best-field bonus per token (additive across tokens, not multiplicative)
|
||||
tokBoost := 0.0
|
||||
if strings.Contains(nameLower, tok) && tokBoost < 3.0 {
|
||||
tokBoost = 3.0
|
||||
}
|
||||
if strings.Contains(tagsLower, tok) && tokBoost < 2.0 {
|
||||
tokBoost = 2.0
|
||||
}
|
||||
if strings.Contains(sigLower, tok) && tokBoost < 1.5 {
|
||||
tokBoost = 1.5
|
||||
}
|
||||
if strings.Contains(descLower, tok) && tokBoost < 1.0 {
|
||||
tokBoost = 1.0
|
||||
}
|
||||
boost += tokBoost
|
||||
}
|
||||
|
||||
// Language penalties (applied to total, not per-token)
|
||||
penalty := 1.0
|
||||
langLower := strings.ToLower(row.lang)
|
||||
if hasPython && langLower == "bash" {
|
||||
penalty = 0.5
|
||||
}
|
||||
if hasBash && langLower == "py" {
|
||||
penalty = 0.5
|
||||
}
|
||||
|
||||
// No clamping — scores differentiate via normalisation in the caller
|
||||
return (base + boost) * penalty
|
||||
}
|
||||
|
||||
// snippet returns the first ~120 chars of description, trimmed cleanly.
|
||||
func snippet(description string, maxLen int) string {
|
||||
description = strings.Map(func(r rune) rune {
|
||||
if unicode.IsControl(r) && r != '\t' {
|
||||
return ' '
|
||||
}
|
||||
return r
|
||||
}, description)
|
||||
description = strings.TrimSpace(description)
|
||||
if len(description) <= maxLen {
|
||||
return description
|
||||
}
|
||||
// Cut at last space before maxLen
|
||||
cut := description[:maxLen]
|
||||
if idx := strings.LastIndex(cut, " "); idx > maxLen/2 {
|
||||
cut = cut[:idx]
|
||||
}
|
||||
return cut + "..."
|
||||
}
|
||||
|
||||
// --- FTS5 query --------------------------------------------------------
|
||||
|
||||
// ftsOnlyQuery returns id + rank from the FTS virtual table only.
|
||||
// bm25() must be used without JOIN — it only works in direct FTS queries.
|
||||
const ftsOnlyQuery = `
|
||||
SELECT id, bm25(functions_fts) AS rank
|
||||
FROM functions_fts
|
||||
WHERE functions_fts MATCH ?
|
||||
ORDER BY rank
|
||||
LIMIT 50
|
||||
`
|
||||
|
||||
// fnDetailQuery fetches metadata for a list of IDs.
|
||||
const fnDetailQuery = `
|
||||
SELECT id, name, lang, signature, description, COALESCE(tags, '[]')
|
||||
FROM functions
|
||||
WHERE id IN (%s)
|
||||
`
|
||||
|
||||
func runMatch(dbPath string, query string, topN int, minScore float64) ([]matchResult, error) {
|
||||
tokens := tokenize(query)
|
||||
if len(tokens) == 0 {
|
||||
return nil, fmt.Errorf("no significant tokens extracted from: %q", query)
|
||||
}
|
||||
|
||||
ftsQ := buildFTSQuery(tokens)
|
||||
if ftsQ == "" {
|
||||
return nil, fmt.Errorf("could not build FTS query")
|
||||
}
|
||||
|
||||
// Open normally (not strict read-only) so WAL frames are visible.
|
||||
// bm25() with mode=ro fails with "missing row from content table" when
|
||||
// the WAL has not been checkpointed — the FTS index references rows that
|
||||
// aren't in the main db file yet. We never write anything here.
|
||||
conn, err := sql.Open("sqlite3", dbPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("opening db: %w", err)
|
||||
}
|
||||
defer conn.Close()
|
||||
|
||||
// Step 1: FTS-only query to get ids + bm25 ranks (no JOIN)
|
||||
ftsRows, err := conn.Query(ftsOnlyQuery, ftsQ)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("fts query: %w", err)
|
||||
}
|
||||
type idRank struct {
|
||||
id string
|
||||
rank float64
|
||||
}
|
||||
var ranked []idRank
|
||||
for ftsRows.Next() {
|
||||
var r idRank
|
||||
if err := ftsRows.Scan(&r.id, &r.rank); err != nil {
|
||||
continue
|
||||
}
|
||||
ranked = append(ranked, r)
|
||||
}
|
||||
ftsRows.Close()
|
||||
|
||||
if len(ranked) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Step 2: fetch metadata for those IDs with a regular SELECT
|
||||
rankMap := make(map[string]float64, len(ranked))
|
||||
ids := make([]string, 0, len(ranked))
|
||||
placeholders := make([]string, 0, len(ranked))
|
||||
args := make([]any, 0, len(ranked))
|
||||
for _, r := range ranked {
|
||||
rankMap[r.id] = r.rank
|
||||
ids = append(ids, r.id)
|
||||
placeholders = append(placeholders, "?")
|
||||
args = append(args, r.id)
|
||||
}
|
||||
|
||||
detailSQL := fmt.Sprintf(fnDetailQuery, strings.Join(placeholders, ","))
|
||||
detailRows, err := conn.Query(detailSQL, args...)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("detail query: %w", err)
|
||||
}
|
||||
defer detailRows.Close()
|
||||
|
||||
hasPython := hasPythonMarkers(tokens)
|
||||
hasBash := hasBashMarkers(tokens)
|
||||
|
||||
var results []matchResult
|
||||
for detailRows.Next() {
|
||||
var r fts5Row
|
||||
if err := detailRows.Scan(&r.id, &r.name, &r.lang, &r.signature, &r.description, &r.tags); err != nil {
|
||||
continue
|
||||
}
|
||||
r.rank = rankMap[r.id]
|
||||
score := scoreHit(r, tokens, hasPython, hasBash)
|
||||
results = append(results, matchResult{
|
||||
ID: r.id,
|
||||
Score: score, // rounded after normalisation below
|
||||
Signature: r.signature,
|
||||
Snippet: snippet(r.description, 120),
|
||||
Lang: r.lang,
|
||||
Name: r.name,
|
||||
Tags: r.tags,
|
||||
})
|
||||
}
|
||||
|
||||
// Sort by score descending
|
||||
sort.Slice(results, func(i, j int) bool {
|
||||
return results[i].Score > results[j].Score
|
||||
})
|
||||
|
||||
// Preserva raw_score (absoluto) ANTES de normalizar — sirve para gates
|
||||
// de confidence absoluto. La normalizacion estetica enmascara queries
|
||||
// debiles donde el top hit es solo el "menos malo" pero realmente no
|
||||
// matchea — sin raw, high_confidence sobre normalized siempre dispara.
|
||||
for i := range results {
|
||||
results[i].RawScore = results[i].Score
|
||||
}
|
||||
// Normalise scores so the top result is 1.0 and the rest are relative.
|
||||
// This makes the output stable and meaningful regardless of token count.
|
||||
if len(results) > 0 && results[0].Score > 0 {
|
||||
maxScore := results[0].Score
|
||||
for i := range results {
|
||||
results[i].Score = math.Round((results[i].Score/maxScore)*1000) / 1000
|
||||
}
|
||||
}
|
||||
|
||||
// Filter by min score
|
||||
var filtered []matchResult
|
||||
for _, r := range results {
|
||||
if r.Score >= minScore {
|
||||
filtered = append(filtered, r)
|
||||
}
|
||||
}
|
||||
|
||||
// Limit to topN
|
||||
if len(filtered) > topN {
|
||||
filtered = filtered[:topN]
|
||||
}
|
||||
|
||||
return filtered, nil
|
||||
}
|
||||
|
||||
// --- command -----------------------------------------------------------
|
||||
|
||||
func cmdMatch(args []string) {
|
||||
topN := 3
|
||||
format := "json"
|
||||
minScore := 0.3
|
||||
var queryArg string
|
||||
|
||||
for i := 0; i < len(args); i++ {
|
||||
switch args[i] {
|
||||
case "--top", "-n":
|
||||
i++
|
||||
if i < len(args) {
|
||||
if n, err := strconv.Atoi(args[i]); err == nil && n > 0 {
|
||||
topN = n
|
||||
}
|
||||
}
|
||||
case "--format", "-f":
|
||||
i++
|
||||
if i < len(args) {
|
||||
format = args[i]
|
||||
}
|
||||
case "--min-score":
|
||||
i++
|
||||
if i < len(args) {
|
||||
if f, err := strconv.ParseFloat(args[i], 64); err == nil {
|
||||
minScore = f
|
||||
}
|
||||
}
|
||||
case "--help", "-h":
|
||||
fmt.Println(`fn match — fuzzy matcher between a shell command and registry functions
|
||||
|
||||
Usage:
|
||||
fn match [--top N] [--format json|text] [--min-score F] "<command>"
|
||||
echo "<command>" | fn match [--top N] [--format json|text] [--min-score F]
|
||||
|
||||
Flags:
|
||||
--top N Return top N results (default: 3)
|
||||
--format Output format: json (default) or text
|
||||
--min-score F Minimum score threshold 0..1 (default: 0.3)
|
||||
|
||||
Example:
|
||||
fn match "taskkill.exe /IM registry_dashboard.exe /F"
|
||||
fn match --top 5 --format text "curl -sf https://api.example.com/health"
|
||||
echo "rsync -avz --exclude .git src/ user@host:/opt/app" | fn match`)
|
||||
return
|
||||
default:
|
||||
if !strings.HasPrefix(args[i], "-") {
|
||||
queryArg = args[i]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Try stdin if no positional arg
|
||||
if queryArg == "" {
|
||||
stat, err := os.Stdin.Stat()
|
||||
if err == nil && (stat.Mode()&os.ModeCharDevice) == 0 {
|
||||
var sb strings.Builder
|
||||
buf := make([]byte, 4096)
|
||||
for {
|
||||
n, err := os.Stdin.Read(buf)
|
||||
if n > 0 {
|
||||
sb.Write(buf[:n])
|
||||
}
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
queryArg = strings.TrimSpace(sb.String())
|
||||
}
|
||||
}
|
||||
|
||||
if queryArg == "" {
|
||||
fmt.Fprintln(os.Stderr, "fn match: no command provided. Use --help for usage.")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
dbPath := filepath.Join(root(), dbName)
|
||||
hits, err := runMatch(dbPath, queryArg, topN, minScore)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "fn match: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// Compute high_confidence flag. Doble gate:
|
||||
// 1. RAW score >= 3.0 — al menos un token con match fuerte de campo
|
||||
// (name=3.0 / tags=2.0 / signature=1.5 / description=1.0). Sin esto,
|
||||
// la normalizacion devolveria 1.0 incluso para queries que no
|
||||
// matchean nada bien (ej. "kelly criterion" -> graph_renderer score
|
||||
// raw < 1.0 pero normalized = 1.0).
|
||||
// 2. Gap top1/top2 > 1.5 (en raw, no normalized) — el top destaca
|
||||
// sobre el siguiente, no es un cluster de matches mediocres.
|
||||
const minRawForHighConf = 4.0
|
||||
highConf := false
|
||||
if len(hits) >= 1 && hits[0].RawScore >= minRawForHighConf {
|
||||
if len(hits) >= 2 && hits[1].RawScore > 0 {
|
||||
highConf = hits[0].RawScore/hits[1].RawScore > 1.5
|
||||
} else {
|
||||
highConf = true // solo un hit con raw alta
|
||||
}
|
||||
}
|
||||
|
||||
switch format {
|
||||
case "text":
|
||||
printMatchText(queryArg, hits, highConf)
|
||||
default:
|
||||
printMatchJSON(queryArg, hits, highConf)
|
||||
}
|
||||
}
|
||||
|
||||
func printMatchJSON(query string, hits []matchResult, highConf bool) {
|
||||
out := matchOutput{
|
||||
Query: query,
|
||||
Top: hits,
|
||||
HighConfidence: highConf,
|
||||
}
|
||||
if out.Top == nil {
|
||||
out.Top = []matchResult{}
|
||||
}
|
||||
enc := json.NewEncoder(os.Stdout)
|
||||
enc.SetIndent("", " ")
|
||||
enc.Encode(out)
|
||||
}
|
||||
|
||||
func printMatchText(query string, hits []matchResult, highConf bool) {
|
||||
conf := ""
|
||||
if highConf {
|
||||
conf = " [HIGH CONFIDENCE]"
|
||||
}
|
||||
fmt.Printf("TOP MATCHES for: %s%s\n", query, conf)
|
||||
if len(hits) == 0 {
|
||||
fmt.Println(" (no matches above threshold)")
|
||||
return
|
||||
}
|
||||
for _, h := range hits {
|
||||
fmt.Printf(" [%.3f] %s\n", h.Score, h.ID)
|
||||
fmt.Printf(" %s\n", h.Signature)
|
||||
fmt.Printf(" %s\n", h.Snippet)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user