935008ec3f
Añade el capability group `recon` (dominio cybersecurity + pipelines, Python),
con la política de archivado OSINT y página madre docs/capabilities/recon.md.
Lookups y sondeo (wrappers de CLI):
- whois_lookup, rdap_lookup, dns_records, ping_host, traceroute_host, nmap_scan
- save_scan_to_osint (sink común) + recon_osint (pipeline one-shot scan+archivado)
Escaneo de puertos/servicios nativo (stdlib, sin nmap ni sudo):
- scan_tcp_ports: connect-scan TCP concurrente (open/closed/filtered)
- grab_service_banner: banner grab + identificación de servicio/versión real
- identify_port_service: puro, puerto -> servicio IANA esperado (~120 puertos)
- scan_port_services: pipeline one-shot (scan -> identify + banner por puerto abierto)
Fingerprint de tecnología web (estilo Wappalyzer), patrón pura/impura:
- fetch_http_fingerprint: GET stdlib, recoge headers/html/cookies (solo nombres)
- detect_web_tech: puro, matchea ~50 firmas regex -> tecnologías por categoría
- fingerprint_web_stack: pipeline one-shot url -> tecnologías
Todas devuelven dict {status} sin lanzar. Tests: 43 verdes, sin red externa.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
192 lines
6.9 KiB
Python
192 lines
6.9 KiB
Python
"""Lookup WHOIS de un dominio o IP via el CLI `whois` del sistema.
|
|
|
|
Funcion IMPURA: ejecuta el binario `whois` (apt) como subproceso, captura el
|
|
stdout completo y parsea best-effort los campos de registro mas comunes. Es
|
|
OSINT pasivo: no toca al objetivo, solo el directorio WHOIS publico.
|
|
|
|
Devuelve siempre un dict (estilo del grupo recon): nunca lanza excepciones.
|
|
"""
|
|
|
|
import re
|
|
import subprocess
|
|
|
|
|
|
def _first_match(raw: str, *labels: str) -> str | None:
|
|
"""Devuelve el valor de la primera linea cuyo label coincide (case-insensitive).
|
|
|
|
Para cada label busca lineas del tipo ``Label: valor`` ignorando mayusculas
|
|
y espacios alrededor de los dos puntos. Devuelve el primer valor no vacio
|
|
encontrado, o None si ningun label aparece.
|
|
"""
|
|
for label in labels:
|
|
pattern = re.compile(
|
|
r"^\s*" + re.escape(label) + r"\s*:\s*(.+?)\s*$",
|
|
re.IGNORECASE | re.MULTILINE,
|
|
)
|
|
for m in pattern.finditer(raw):
|
|
value = m.group(1).strip()
|
|
if value:
|
|
return value
|
|
return None
|
|
|
|
|
|
def _all_matches(raw: str, *labels: str) -> list[str]:
|
|
"""Devuelve todos los valores (deduplicados, en orden) para los labels dados."""
|
|
out: list[str] = []
|
|
seen: set[str] = set()
|
|
for label in labels:
|
|
pattern = re.compile(
|
|
r"^\s*" + re.escape(label) + r"\s*:\s*(.+?)\s*$",
|
|
re.IGNORECASE | re.MULTILINE,
|
|
)
|
|
for m in pattern.finditer(raw):
|
|
value = m.group(1).strip()
|
|
if value and value.lower() not in seen:
|
|
seen.add(value.lower())
|
|
out.append(value)
|
|
return out
|
|
|
|
|
|
def parse_whois_raw(raw: str, target: str) -> dict:
|
|
"""Parsea best-effort el texto crudo de `whois` en campos normalizados.
|
|
|
|
Funcion auxiliar (pura) usada por whois_lookup y por el smoke test. Tolera
|
|
la ausencia de cualquier campo (deja None / lista vacia) porque el formato
|
|
WHOIS no esta estandarizado y varia por TLD y registrar.
|
|
|
|
Args:
|
|
raw: stdout completo del comando `whois`.
|
|
target: dominio o IP consultado (se incluye en el dict de salida).
|
|
|
|
Returns:
|
|
Dict con status "ok", el raw completo y los campos parseados.
|
|
"""
|
|
return {
|
|
"status": "ok",
|
|
"target": target,
|
|
"raw": raw,
|
|
"registrar": _first_match(raw, "Registrar", "registrar"),
|
|
"registrant_country": _first_match(raw, "Registrant Country", "Country"),
|
|
"creation_date": _first_match(
|
|
raw, "Creation Date", "created", "Created On", "Registered on"
|
|
),
|
|
"expiry_date": _first_match(
|
|
raw,
|
|
"Registry Expiry Date",
|
|
"Expiry Date",
|
|
"Expiration Date",
|
|
"Registrar Registration Expiration Date",
|
|
"Expiry",
|
|
"expires",
|
|
),
|
|
"updated_date": _first_match(
|
|
raw, "Updated Date", "Last Modified", "last-modified", "changed"
|
|
),
|
|
"name_servers": [
|
|
ns.lower()
|
|
for ns in _all_matches(raw, "Name Server", "nserver", "Nameservers")
|
|
],
|
|
}
|
|
|
|
|
|
def whois_lookup(target: str, timeout_s: int = 30) -> dict:
|
|
"""Ejecuta `whois <target>` y parsea best-effort los campos de registro.
|
|
|
|
Funcion IMPURA: lanza el CLI `whois` como subproceso. Captura el stdout
|
|
completo (siempre presente en ``raw``) y extrae campos comunes de forma
|
|
tolerante. Devuelve un dict; nunca lanza: los errores se reportan como
|
|
``{"status": "error", "error": "..."}``.
|
|
|
|
Args:
|
|
target: Dominio (ej. ``"google.com"``) o direccion IP a consultar.
|
|
timeout_s: Segundos maximo de espera del subproceso (default 30).
|
|
|
|
Returns:
|
|
Dict de exito::
|
|
|
|
{
|
|
"status": "ok",
|
|
"target": <target>,
|
|
"raw": <stdout completo del whois>,
|
|
"registrar": str | None,
|
|
"registrant_country": str | None,
|
|
"creation_date": str | None,
|
|
"expiry_date": str | None,
|
|
"updated_date": str | None,
|
|
"name_servers": [str, ...],
|
|
}
|
|
|
|
Para IPs varios campos de dominio quedan None. En fallo::
|
|
|
|
{"status": "error", "error": "<mensaje>", "target": <target>}
|
|
"""
|
|
if not target or not target.strip():
|
|
return {"status": "error", "error": "whois_lookup: target vacio", "target": target}
|
|
|
|
target = target.strip()
|
|
|
|
try:
|
|
proc = subprocess.run(
|
|
["whois", target],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=timeout_s,
|
|
)
|
|
except FileNotFoundError:
|
|
return {
|
|
"status": "error",
|
|
"error": "whois_lookup: binario 'whois' no encontrado (instala con `apt install whois`)",
|
|
"target": target,
|
|
}
|
|
except subprocess.TimeoutExpired:
|
|
return {
|
|
"status": "error",
|
|
"error": f"whois_lookup: timeout tras {timeout_s}s consultando '{target}'",
|
|
"target": target,
|
|
}
|
|
except OSError as e: # pragma: no cover - errores de SO raros
|
|
return {"status": "error", "error": f"whois_lookup: {e}", "target": target}
|
|
|
|
raw = proc.stdout or ""
|
|
# whois suele devolver stdout incluso con rc != 0; solo es error duro si no
|
|
# hubo NADA de salida util.
|
|
if not raw.strip():
|
|
err = (proc.stderr or "").strip() or f"whois devolvio salida vacia (rc={proc.returncode})"
|
|
return {"status": "error", "error": f"whois_lookup: {err}", "target": target}
|
|
|
|
return parse_whois_raw(raw, target)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Smoke test: el assert core NO depende de red — parsea un sample whois
|
|
# hardcoded. Tras eso intenta una consulta real, tolerando fallo de red.
|
|
SAMPLE = """\
|
|
Domain Name: GOOGLE.COM
|
|
Registrar: MarkMonitor Inc.
|
|
Registrant Country: US
|
|
Creation Date: 1997-09-15T04:00:00Z
|
|
Registry Expiry Date: 2028-09-14T04:00:00Z
|
|
Updated Date: 2019-09-09T15:39:04Z
|
|
Name Server: NS1.GOOGLE.COM
|
|
Name Server: NS2.GOOGLE.COM
|
|
"""
|
|
parsed = parse_whois_raw(SAMPLE, "google.com")
|
|
assert parsed["status"] == "ok", parsed
|
|
assert parsed["registrar"] == "MarkMonitor Inc.", parsed["registrar"]
|
|
assert parsed["registrant_country"] == "US", parsed["registrant_country"]
|
|
assert parsed["creation_date"] == "1997-09-15T04:00:00Z", parsed["creation_date"]
|
|
assert parsed["expiry_date"] == "2028-09-14T04:00:00Z", parsed["expiry_date"]
|
|
assert parsed["updated_date"] == "2019-09-09T15:39:04Z", parsed["updated_date"]
|
|
assert parsed["name_servers"] == ["ns1.google.com", "ns2.google.com"], parsed["name_servers"]
|
|
assert parsed["raw"] == SAMPLE
|
|
print("smoke parse OK")
|
|
|
|
# Consulta real, best-effort (no rompe el smoke si no hay red).
|
|
live = whois_lookup("google.com")
|
|
print("live status:", live["status"])
|
|
if live["status"] == "ok":
|
|
print(" registrar:", live.get("registrar"))
|
|
print(" name_servers:", live.get("name_servers"))
|
|
else:
|
|
print(" (red no disponible o whois fallo, tolerado):", live.get("error"))
|