dff0c0d2b7
Extractores nuevos en python/functions/cybersecurity/: - extract_ip_addresses (IPv4 + IPv6 con validacion ipaddress) - extract_emails (RFC 5322 simplificado) - extract_domains (FQDNs con TLD valido, lista estatica) - extract_file_hashes (MD5/SHA1/SHA256/SHA512, algoritmo por longitud) - extract_crypto_wallets (BTC legacy + bech32, ETH 0x+40hex) - extract_cve_ids (CVE-YYYY-NNNN+) - extract_mac_addresses (xx:xx:xx + xx-xx-xx, separador uniforme) - extract_phone_numbers (E.164 + ES local 9 digitos) Pipeline: - extract_iocs corre todos, deduplica spans contenidos. Mantiene purity:pure (kind:function con uses_functions no vacio) porque la regla del registry exige que los pipelines sean impuros. Todas devuelven list[dict] con value/start/end/type para que el caller (issues 0038-0040) pueda reconciliar offsets con spans NER sin reparsing. Refs #0037 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
48 lines
1.2 KiB
Python
48 lines
1.2 KiB
Python
from .cybersecurity import (
|
|
hash_sha256,
|
|
hash_md5,
|
|
entropy_shannon,
|
|
detect_sql_injection,
|
|
extract_urls,
|
|
is_base64,
|
|
is_hex,
|
|
levenshtein_distance,
|
|
jaccard_similarity,
|
|
normalize_url,
|
|
envelope_encrypt,
|
|
envelope_decrypt,
|
|
)
|
|
from .extract_ip_addresses import extract_ip_addresses
|
|
from .extract_emails import extract_emails
|
|
from .extract_domains import extract_domains
|
|
from .extract_file_hashes import extract_file_hashes
|
|
from .extract_crypto_wallets import extract_crypto_wallets
|
|
from .extract_cve_ids import extract_cve_ids
|
|
from .extract_mac_addresses import extract_mac_addresses
|
|
from .extract_phone_numbers import extract_phone_numbers
|
|
from .extract_iocs import extract_iocs
|
|
|
|
__all__ = [
|
|
"hash_sha256",
|
|
"hash_md5",
|
|
"entropy_shannon",
|
|
"detect_sql_injection",
|
|
"extract_urls",
|
|
"is_base64",
|
|
"is_hex",
|
|
"levenshtein_distance",
|
|
"jaccard_similarity",
|
|
"normalize_url",
|
|
"envelope_encrypt",
|
|
"envelope_decrypt",
|
|
"extract_ip_addresses",
|
|
"extract_emails",
|
|
"extract_domains",
|
|
"extract_file_hashes",
|
|
"extract_crypto_wallets",
|
|
"extract_cve_ids",
|
|
"extract_mac_addresses",
|
|
"extract_phone_numbers",
|
|
"extract_iocs",
|
|
]
|