"""Extrae hashes MD5/SHA1/SHA256/SHA512 de un texto, con offsets y algoritmo.""" import re # Mas largo primero para evitar que un SHA256 quede como SHA1+resto. _HASH_LENGTHS = ( (128, "sha512"), (64, "sha256"), (40, "sha1"), (32, "md5"), ) _HASH_CANDIDATE = re.compile(r"\b[A-Fa-f0-9]{32,128}\b") def extract_file_hashes(text: str) -> list[dict]: """Extrae hashes hex con su algoritmo deducido por longitud. Reconoce MD5 (32), SHA1 (40), SHA256 (64) y SHA512 (128). Hashes de longitudes intermedias se ignoran. Devuelve `algorithm` ademas de los campos estandar. """ results = [] for m in _HASH_CANDIDATE.finditer(text): candidate = m.group(0) length = len(candidate) algorithm = next( (algo for size, algo in _HASH_LENGTHS if size == length), None, ) if algorithm is None: continue results.append({ "value": candidate, "start": m.start(), "end": m.end(), "type": "file_hash", "algorithm": algorithm, }) return results