feat(scans): persistencia de escaneos de red + POST /api/scan

Tabla network_scans (migración 005, schema main, lleva note_path) que otras
herramientas pueblan vía HTTP con escaneos de reconocimiento (whois/rdap/dns/
nmap/traceroute/ping). Endpoint POST /api/scan: id determinista
<target_slug>:<scan_type>:<YYYYMMDD-HHMM> derivado de scan_ts, idempotente por
id (duckdb_upsert ON CONFLICT DO UPDATE) bajo el lock single-writer del service.
summary (dict) se serializa a JSON.

network_scans no se deriva de notas: ni ingest_vault ni ingest_dav la tocan, así
que un re-ingest del vault no la trunca (test lo verifica).

Tests: inserción + id derivado, idempotencia mismo-minuto, validación de campos
requeridos (422), y no-truncado por ingest del vault.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-14 13:13:36 +02:00
parent 9677903ca6
commit 3063d3c44f
5 changed files with 277 additions and 1 deletions
+99
View File
@@ -845,3 +845,102 @@ def test_push_all_dav_bulk_flujo_mockeado(client, cfg, monkeypatch):
by_uid = {row["uid"]: row["etag"] for row in rows}
assert by_uid["c-a"] == '"etag-a"'
assert by_uid["c-b"] == '"etag-b"'
# --- F6: escaneos de red (POST /api/scan) ----------------------------------
def test_api_scan_inserta_fila_y_id_derivado(client):
"""POST /api/scan inserta en network_scans con id <slug>:<type>:<YYYYMMDD-HHMM>."""
body = {
"target": "example.com",
"target_slug": "example.com",
"scan_type": "whois",
"tool": "whois",
"note_path": "dominios/example.com/recon/whois-20260614-1200.md",
"summary": {"registrar": "X"},
"scan_ts": "2026-06-14T12:00:00",
}
r = client.post("/api/scan", json=body)
assert r.status_code == 200
j = r.json()
assert j["status"] == "ok"
assert j["id"] == "example.com:whois:20260614-1200"
assert j["inserted"] == 1
q = client.post(
"/api/query",
json={
"sql": "SELECT id, target, scan_type, tool, note_path, summary "
"FROM network_scans WHERE id = 'example.com:whois:20260614-1200'"
},
).json()
assert q["status"] == "ok"
row = q["rows"][0]
assert row["target"] == "example.com"
assert row["scan_type"] == "whois"
assert row["tool"] == "whois"
assert row["note_path"].endswith("whois-20260614-1200.md")
# summary se guarda como JSON.
assert json.loads(row["summary"]) == {"registrar": "X"}
def test_api_scan_idempotente_mismo_minuto(client):
"""Dos escaneos del mismo target/tipo/minuto colapsan al mismo id (upsert)."""
base = {
"target": "8.8.8.8",
"target_slug": "8.8.8.8",
"scan_type": "ping",
"tool": "ping",
"note_path": "dominios/8.8.8.8/recon/ping.md",
"scan_ts": "2026-06-14T09:30:15",
}
r1 = client.post("/api/scan", json={**base, "summary": {"loss": "0%"}}).json()
assert r1["status"] == "ok"
assert r1["inserted"] == 1
# Mismo minuto, mismo id -> es un UPDATE, no una segunda fila.
r2 = client.post(
"/api/scan", json={**base, "scan_ts": "2026-06-14T09:30:55", "summary": {"loss": "10%"}}
).json()
assert r2["status"] == "ok"
assert r2["id"] == r1["id"] == "8.8.8.8:ping:20260614-0930"
assert r2["updated"] == 1
q = client.post(
"/api/query",
json={"sql": "SELECT COUNT(*) AS n FROM network_scans WHERE target_slug = '8.8.8.8'"},
).json()
assert q["rows"][0]["n"] == 1 # una sola fila pese a los dos POST
def test_api_scan_valida_campos_requeridos(client):
"""Falta un campo obligatorio -> 4xx (validación de Pydantic)."""
# note_path ausente -> 422 de FastAPI/Pydantic.
r = client.post(
"/api/scan",
json={"target": "x.com", "target_slug": "x.com", "scan_type": "dns"},
)
assert r.status_code == 422
def test_api_scan_no_lo_borra_el_ingest_vault(client):
"""El re-ingest del vault NO trunca network_scans (no se deriva de notas)."""
client.post("/api/ingest/vault")
client.post(
"/api/scan",
json={
"target": "acme.com",
"target_slug": "acme.com",
"scan_type": "dns",
"note_path": "dominios/acme.com/recon/dns.md",
"summary": {"a": ["1.2.3.4"]},
"scan_ts": "2026-06-14T08:00:00",
},
)
# Re-ingestar el vault (reconstruye notes + entidades de espejo) no debe
# tocar network_scans.
client.post("/api/ingest/vault")
q = client.post(
"/api/query", json={"sql": "SELECT COUNT(*) AS n FROM network_scans"}
).json()
assert q["rows"][0]["n"] == 1