feat(recon): grupo de reconocimiento de red + servicios + fingerprint web

Añade el capability group `recon` (dominio cybersecurity + pipelines, Python),
con la política de archivado OSINT y página madre docs/capabilities/recon.md.

Lookups y sondeo (wrappers de CLI):
- whois_lookup, rdap_lookup, dns_records, ping_host, traceroute_host, nmap_scan
- save_scan_to_osint (sink común) + recon_osint (pipeline one-shot scan+archivado)

Escaneo de puertos/servicios nativo (stdlib, sin nmap ni sudo):
- scan_tcp_ports: connect-scan TCP concurrente (open/closed/filtered)
- grab_service_banner: banner grab + identificación de servicio/versión real
- identify_port_service: puro, puerto -> servicio IANA esperado (~120 puertos)
- scan_port_services: pipeline one-shot (scan -> identify + banner por puerto abierto)

Fingerprint de tecnología web (estilo Wappalyzer), patrón pura/impura:
- fetch_http_fingerprint: GET stdlib, recoge headers/html/cookies (solo nombres)
- detect_web_tech: puro, matchea ~50 firmas regex -> tecnologías por categoría
- fingerprint_web_stack: pipeline one-shot url -> tecnologías

Todas devuelven dict {status} sin lanzar. Tests: 43 verdes, sin red externa.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-14 15:12:07 +02:00
parent d89da1292d
commit 935008ec3f
49 changed files with 6659 additions and 302 deletions
@@ -1,109 +1,59 @@
"""Tests para whois_lookup."""
"""Tests para whois_lookup (CLI `whois`, estilo dict sin excepciones)."""
import os
import sys
sys.path.insert(0, os.path.dirname(__file__))
import whois_lookup as wl
from whois_lookup import whois_lookup
from whois_lookup import parse_whois_raw, whois_lookup
SAMPLE = """\
Domain Name: GOOGLE.COM
Registrar: MarkMonitor Inc.
Registrant Country: US
Creation Date: 1997-09-15T04:00:00Z
Registry Expiry Date: 2028-09-14T04:00:00Z
Updated Date: 2019-09-09T15:39:04Z
Name Server: NS1.GOOGLE.COM
Name Server: NS2.GOOGLE.COM
"""
def _rdap_sample() -> dict:
return {
"ldhName": "organic-machine.com",
"status": ["client transfer prohibited"],
"events": [
{"eventAction": "registration", "eventDate": "2020-01-15T10:00:00Z"},
{"eventAction": "expiration", "eventDate": "2027-01-15T10:00:00Z"},
{"eventAction": "last changed", "eventDate": "2026-01-10T08:30:00Z"},
],
"nameservers": [
{"ldhName": "ns1.example.net"},
{"ldhName": "NS2.EXAMPLE.NET"},
],
"entities": [
{
"handle": "REG-123",
"roles": ["registrar"],
"vcardArray": [
"vcard",
[
["version", {}, "text", "4.0"],
["fn", {}, "text", "Example Registrar Inc."],
],
],
},
{"handle": "REGISTRANT-9", "roles": ["registrant"]},
],
}
def test_parsea_campos_comunes():
"""Extrae registrar, pais, fechas y nameservers de un sample whois."""
parsed = parse_whois_raw(SAMPLE, "google.com")
assert parsed["status"] == "ok"
assert parsed["target"] == "google.com"
assert parsed["registrar"] == "MarkMonitor Inc."
assert parsed["registrant_country"] == "US"
assert parsed["creation_date"] == "1997-09-15T04:00:00Z"
assert parsed["expiry_date"] == "2028-09-14T04:00:00Z"
assert parsed["updated_date"] == "2019-09-09T15:39:04Z"
assert parsed["name_servers"] == ["ns1.google.com", "ns2.google.com"]
assert parsed["raw"] == SAMPLE
def test_normaliza_respuesta_rdap(monkeypatch):
"""Extrae registrar, fechas, nameservers, status y entities."""
monkeypatch.setattr(wl, "http_get_json", lambda url, timeout=15.0: _rdap_sample())
def test_campos_ausentes_quedan_none():
"""Un raw minimo deja los campos opcionales en None / lista vacia."""
parsed = parse_whois_raw("Domain Name: x.com\n", "x.com")
result = whois_lookup("organic-machine.com")
assert result["found"] is True
assert result["registrar"] == "Example Registrar Inc."
assert result["creation_date"] == "2020-01-15T10:00:00Z"
assert result["expiration_date"] == "2027-01-15T10:00:00Z"
assert result["last_changed"] == "2026-01-10T08:30:00Z"
assert result["nameservers"] == ["ns1.example.net", "ns2.example.net"]
assert result["status"] == ["client transfer prohibited"]
assert {"handle": "REGISTRANT-9", "roles": ["registrant"]} in result["entities"]
assert result["raw"]["ldhName"] == "organic-machine.com"
assert parsed["status"] == "ok"
assert parsed["registrar"] is None
assert parsed["creation_date"] is None
assert parsed["expiry_date"] is None
assert parsed["name_servers"] == []
def test_dominio_no_encontrado_404(monkeypatch):
"""Un HTTP 404 de http_get_json devuelve {'found': False}."""
def fake(url, timeout=15.0):
raise RuntimeError("http_get_json: HTTP 404 at 'rdap.org' — not found")
monkeypatch.setattr(wl, "http_get_json", fake)
result = whois_lookup("nope-no-existe-xyz.invalid")
assert result == {"found": False}
def test_raw_siempre_presente():
"""El campo raw refleja siempre el texto de entrada tal cual."""
raw = "Random: noise\n"
parsed = parse_whois_raw(raw, "noise.test")
assert parsed["raw"] == raw
def test_otro_error_http_se_propaga(monkeypatch):
"""Un error HTTP distinto de 404 se propaga como RuntimeError."""
def fake(url, timeout=15.0):
raise RuntimeError("http_get_json: HTTP 500 at 'rdap.org' — boom")
monkeypatch.setattr(wl, "http_get_json", fake)
try:
whois_lookup("organic-machine.com")
assert False, "deberia haberse propagado el error 500"
except RuntimeError as e:
assert "HTTP 500" in str(e)
def test_sin_registrar_ni_fechas(monkeypatch):
"""RDAP minimo: campos opcionales quedan None / listas vacias."""
monkeypatch.setattr(
wl, "http_get_json", lambda url, timeout=15.0: {"ldhName": "x.com"}
)
result = whois_lookup("x.com")
assert result["found"] is True
assert result["registrar"] is None
assert result["creation_date"] is None
assert result["nameservers"] == []
assert result["status"] == []
assert result["entities"] == []
def test_dominio_vacio_lanza_error():
"""Dominio vacio lanza RuntimeError."""
try:
whois_lookup("")
assert False, "deberia haber lanzado RuntimeError"
except RuntimeError:
pass
def test_target_vacio_devuelve_error():
"""Un target vacio devuelve status error sin lanzar."""
result = whois_lookup("")
assert result["status"] == "error"
assert "vacio" in result["error"]