bd9383fd82
12 funciones puras con implementación real: HashSHA256, HashMD5, EntropyShannon, IsBase64, IsHex, ExtractURLs, ParseIPCIDR, IPInRange, NormalizeURL, DetectSQLInjection, LevenshteinDistance, JaccardSimilarity 4 funciones impuras con implementación real (stdlib): LookupWhois, ResolveDNS, FetchHTTPHeaders, ScanPortTCP Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
38 lines
819 B
Go
38 lines
819 B
Go
package cybersecurity
|
|
|
|
// JaccardSimilarity calcula la similitud de Jaccard entre dos conjuntos de tokens.
|
|
// Devuelve un valor entre 0.0 (sin interseccion) y 1.0 (conjuntos identicos).
|
|
func JaccardSimilarity(a, b []string) float64 {
|
|
if len(a) == 0 && len(b) == 0 {
|
|
return 1.0
|
|
}
|
|
if len(a) == 0 || len(b) == 0 {
|
|
return 0.0
|
|
}
|
|
|
|
setA := make(map[string]struct{}, len(a))
|
|
for _, s := range a {
|
|
setA[s] = struct{}{}
|
|
}
|
|
|
|
setB := make(map[string]struct{}, len(b))
|
|
for _, s := range b {
|
|
setB[s] = struct{}{}
|
|
}
|
|
|
|
intersection := 0
|
|
for k := range setA {
|
|
if _, ok := setB[k]; ok {
|
|
intersection++
|
|
}
|
|
}
|
|
|
|
// Union = |A| + |B| - |A intersect B| (usando conjuntos sin duplicados)
|
|
union := len(setA) + len(setB) - intersection
|
|
if union == 0 {
|
|
return 0.0
|
|
}
|
|
|
|
return float64(intersection) / float64(union)
|
|
}
|