feat(recon): grupo de reconocimiento de red + servicios + fingerprint web

Añade el capability group `recon` (dominio cybersecurity + pipelines, Python),
con la política de archivado OSINT y página madre docs/capabilities/recon.md.

Lookups y sondeo (wrappers de CLI):
- whois_lookup, rdap_lookup, dns_records, ping_host, traceroute_host, nmap_scan
- save_scan_to_osint (sink común) + recon_osint (pipeline one-shot scan+archivado)

Escaneo de puertos/servicios nativo (stdlib, sin nmap ni sudo):
- scan_tcp_ports: connect-scan TCP concurrente (open/closed/filtered)
- grab_service_banner: banner grab + identificación de servicio/versión real
- identify_port_service: puro, puerto -> servicio IANA esperado (~120 puertos)
- scan_port_services: pipeline one-shot (scan -> identify + banner por puerto abierto)

Fingerprint de tecnología web (estilo Wappalyzer), patrón pura/impura:
- fetch_http_fingerprint: GET stdlib, recoge headers/html/cookies (solo nombres)
- detect_web_tech: puro, matchea ~50 firmas regex -> tecnologías por categoría
- fingerprint_web_stack: pipeline one-shot url -> tecnologías

Todas devuelven dict {status} sin lanzar. Tests: 43 verdes, sin red externa.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-14 15:12:07 +02:00
parent d89da1292d
commit 935008ec3f
49 changed files with 6659 additions and 302 deletions
@@ -0,0 +1,141 @@
"""Tests para el pipeline recon_osint — SIN red ni service real.
Las funciones de escaneo (whois_lookup, nmap_scan, ...) y el sink
save_scan_to_osint se importan en el namespace del modulo del pipeline con
``from cybersecurity import (...)``. Para aislarlos de la red/disco los
parcheamos sobre los globals del propio modulo via importlib + monkeypatch.
Los tests usan kwargs minimos (target + scan_type + save) a proposito: la firma
del pipeline puede ampliarse en paralelo (p.ej. con un parametro ``confirm``)
sin que estos tests dejen de pasar.
"""
import importlib
import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
# Globals del modulo del pipeline (donde viven whois_lookup, save_scan_to_osint...).
mod = importlib.import_module("pipelines.recon_osint")
recon_osint = mod.recon_osint
def test_golden_whois_save_true_invoca_scan_y_sink(monkeypatch):
"""scan_type='whois', save=True: ejecuta whois_lookup y archiva con su raw."""
fake_scan = {
"status": "ok",
"target": "example.com",
"registrar": "Acme Registrar",
"expiry_date": "2028-09-14T04:00:00Z",
"raw": "Domain Name: EXAMPLE.COM\nRegistrar: Acme Registrar\n",
}
calls = {}
def fake_whois(target, **kwargs):
calls["whois_target"] = target
return fake_scan
def fake_save(target, scan_type, raw, **kwargs):
calls["save"] = {
"target": target,
"scan_type": scan_type,
"raw": raw,
"kwargs": kwargs,
}
return {
"status": "ok",
"note_path": "dominios/example.com/recon/whois-20260614-1200.md",
"registered": True,
"scan_id": "scan-1",
}
monkeypatch.setattr(mod, "whois_lookup", fake_whois)
monkeypatch.setattr(mod, "save_scan_to_osint", fake_save)
result = recon_osint("example.com", scan_type="whois", save=True)
assert result["status"] == "ok"
assert result["scan_type"] == "whois"
assert result["target"] == "example.com"
# Devuelve el dict crudo del scan.
assert result["scan"] == fake_scan
# Devuelve los datos de archivado del sink.
assert result["osint"]["registered"] is True
assert result["osint"]["scan_id"] == "scan-1"
# whois_lookup recibio el target.
assert calls["whois_target"] == "example.com"
# save_scan_to_osint fue invocado con el raw del scan.
assert "save" in calls
assert calls["save"]["raw"] == fake_scan["raw"]
assert calls["save"]["target"] == "example.com"
assert calls["save"]["scan_type"] == "whois"
def test_save_false_ejecuta_scan_sin_archivar(monkeypatch):
"""save=False: corre el scan pero NO llama save_scan_to_osint."""
fake_scan = {
"status": "ok",
"target": "example.com",
"registrar": "Acme",
"raw": "Domain Name: EXAMPLE.COM\n",
}
save_called = {"n": 0}
monkeypatch.setattr(mod, "whois_lookup", lambda target, **kw: fake_scan)
def fake_save(*args, **kwargs): # pragma: no cover - no debe llamarse
save_called["n"] += 1
return {"status": "ok"}
monkeypatch.setattr(mod, "save_scan_to_osint", fake_save)
result = recon_osint("example.com", scan_type="whois", save=False)
assert result["status"] == "ok"
assert result["scan"] == fake_scan
# Sin archivado: osint es None y el sink nunca se invoco.
assert result["osint"] is None
assert save_called["n"] == 0
def test_scan_type_invalido_error_sin_red(monkeypatch):
"""scan_type desconocido: status error sin invocar ninguna funcion de scan/sink."""
# Centinelas que petan si se invocan: el pipeline no debe tocar nada.
def explode(*args, **kwargs): # pragma: no cover - no debe llamarse
raise AssertionError("no debe ejecutarse scan ni sink con scan_type invalido")
monkeypatch.setattr(mod, "whois_lookup", explode)
monkeypatch.setattr(mod, "nmap_scan", explode)
monkeypatch.setattr(mod, "save_scan_to_osint", explode)
result = recon_osint("example.com", scan_type="bogus", save=True)
assert result["status"] == "error"
assert result["stage"] == "validate"
assert result["scan_type"] == "bogus"
assert "valid" in result and isinstance(result["valid"], list)
def test_scan_fallido_no_intenta_archivar(monkeypatch):
"""Si el escaneo devuelve status error, no se llama al sink."""
save_called = {"n": 0}
monkeypatch.setattr(
mod,
"whois_lookup",
lambda target, **kw: {"status": "error", "error": "timeout"},
)
def fake_save(*args, **kwargs): # pragma: no cover - no debe llamarse
save_called["n"] += 1
return {}
monkeypatch.setattr(mod, "save_scan_to_osint", fake_save)
result = recon_osint("example.com", scan_type="whois", save=True)
assert result["status"] == "error"
assert result["stage"] == "scan"
assert save_called["n"] == 0