Files
osint_web/server/test_main.py
T
agent 6af9a56c28 feat: initial scaffold of osint_web — backend Python sobre el grupo obsidian
Fase 5b del issue 0172. Backend stdlib http (solo 127.0.0.1) que orquesta
las funciones del grupo obsidian del fn_registry para servir el vault OSINT:
grafo agregado (/api/graph), tablas por tipo (/api/nodes), fichas con
attachments (/api/node, /api/attachment con bloqueo de path traversal) y
busqueda (/api/search). Cache en memoria con POST /api/refresh.

Tests pytest (10) sobre vault fixture: grafo golden, tipo filtrado, ficha
con attachments, wikilink dangling, slug con acentos, traversal bloqueado,
vault inexistente (exit 2) y e2e HTTP en puerto efimero. Frontend (React +
Vite + Mantine + sigma.js) queda para la fase siguiente.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-11 22:36:07 +02:00

242 lines
8.0 KiB
Python

"""Tests del backend osint_web sobre un vault fixture efímero.
Cubre los escenarios del Definition of Done del issue 0172 que aplican al
backend: grafo golden, tabla filtrada por tipo, ficha con attachments,
wikilink dangling, slug con acentos, path traversal bloqueado y vault
inexistente con error claro. Incluye un test e2e que levanta el servidor en
un puerto efímero y golpea los endpoints reales por HTTP.
"""
import importlib.util
import json
import os
import subprocess
import sys
import threading
import urllib.error
import urllib.request
import pytest
HERE = os.path.dirname(os.path.abspath(__file__))
_spec = importlib.util.spec_from_file_location(
"osint_web_main", os.path.join(HERE, "main.py")
)
main = importlib.util.module_from_spec(_spec)
_spec.loader.exec_module(main)
# --- fixture: vault mínimo con personas, organizaciones y attachments --------
@pytest.fixture()
def vault(tmp_path):
"""Vault Obsidian efímero: 2 notas reales conectadas + 1 wikilink roto."""
root = tmp_path / "vault_osint"
(root / ".obsidian").mkdir(parents=True)
(root / ".obsidian" / "app.json").write_text("{}", encoding="utf-8")
persona_dir = root / "personas"
persona_dir.mkdir()
(persona_dir / "maria-del-mar-perez.md").write_text(
"---\n"
"tipo: persona\n"
"nombre: María del Mar Pérez\n"
"dni: 12345678Z\n"
"fecha_nacimiento: 1980-05-01\n"
"tags: [objetivo]\n"
"---\n"
"\n"
"Ficha de prueba.\n"
"\n"
"## Relaciones\n"
"\n"
"- [[ACME SL]]\n"
"- [[Persona-Inexistente]]\n"
"\n"
"## Documentos\n"
"\n"
"![[dni-maria.jpg]]\n"
"![[certificado-perdido.pdf]]\n",
encoding="utf-8",
)
org_dir = root / "organizaciones"
org_dir.mkdir()
(org_dir / "acme-sl.md").write_text(
"---\n"
"tipo: organizacion\n"
"nombre: ACME SL\n"
"cif: B00000000\n"
"---\n"
"\n"
"## Relaciones\n"
"\n"
"- [[María del Mar Pérez]]\n",
encoding="utf-8",
)
attach_dir = root / "attachments" / "personas" / "maria-del-mar-perez"
attach_dir.mkdir(parents=True)
(attach_dir / "dni-maria.jpg").write_bytes(b"\xff\xd8\xff" + b"fakejpegdata")
return str(root)
# --- VaultState: grafo, tablas, fichas ----------------------------------------
def test_graph_golden(vault):
state = main.VaultState(vault)
ids = {n["id"] for n in state.graph["nodes"]}
assert {"maria-del-mar-perez", "acme-sl"} <= ids
# Arista de la sección ## Relaciones con kind correcto y destino resuelto.
assert {
"source": "maria-del-mar-perez",
"target": "acme-sl",
"kind": "relacion",
} in state.graph["edges"]
def test_wikilink_acentos_resuelve_por_slug(vault):
"""[[María del Mar Pérez]] (acentos, mayúsculas) → maria-del-mar-perez.md."""
state = main.VaultState(vault)
assert {
"source": "acme-sl",
"target": "maria-del-mar-perez",
"kind": "relacion",
} in state.graph["edges"]
def test_wikilink_dangling_genera_nodo_fantasma(vault):
state = main.VaultState(vault)
ghosts = [n for n in state.graph["nodes"] if n.get("dangling")]
assert any(n["id"] == "persona-inexistente" for n in ghosts)
# Y no aparece en las tablas (solo nodos reales).
assert all(r["id"] != "persona-inexistente" for r in state.rows_by_tipo(""))
def test_rows_filtradas_por_tipo(vault):
state = main.VaultState(vault)
rows = state.rows_by_tipo("organizacion")
assert [r["id"] for r in rows] == ["acme-sl"]
assert rows[0]["frontmatter"]["cif"] == "B00000000"
def test_node_detail_con_attachments(vault):
state = main.VaultState(vault)
detail = state.node_detail("maria-del-mar-perez")
assert detail is not None
assert detail["frontmatter"]["dni"] == "12345678Z"
assert "Ficha de prueba" in detail["body"]
by_name = {a["name"]: a for a in detail["attachments"]}
dni = by_name["dni-maria.jpg"]
assert dni["kind"] == "image"
assert dni["path"] == os.path.join(
"attachments", "personas", "maria-del-mar-perez", "dni-maria.jpg"
)
# Embed que no resuelve a archivo → marcado missing, sin crash.
assert by_name["certificado-perdido.pdf"]["kind"] == "missing"
def test_node_detail_desconocido(vault):
state = main.VaultState(vault)
assert state.node_detail("no-existe-este-slug") is None
# --- seguridad: path traversal + vault inexistente ----------------------------
def test_attachment_path_traversal_bloqueado(vault):
state = main.VaultState(vault)
assert state.resolve_attachment_path("../../etc/passwd") is None
assert state.resolve_attachment_path("/etc/passwd") is None
assert state.resolve_attachment_path("") is None
assert state.resolve_attachment_path(".") is None
# Un path legítimo dentro del vault sí resuelve.
ok = state.resolve_attachment_path(
"attachments/personas/maria-del-mar-perez/dni-maria.jpg"
)
assert ok is not None and ok.endswith("dni-maria.jpg")
def test_vault_inexistente_error_claro():
with pytest.raises(FileNotFoundError, match="el vault no existe"):
main.VaultState("/no/existe/este/vault")
def test_cli_vault_inexistente_exit_2():
proc = subprocess.run(
[sys.executable, os.path.join(HERE, "main.py"), "--vault", "/no/existe"],
capture_output=True,
text=True,
timeout=30,
)
assert proc.returncode == 2
assert "el vault no existe" in proc.stderr
# --- e2e HTTP: server real en puerto efímero ----------------------------------
def _get(base, path):
try:
with urllib.request.urlopen(base + path, timeout=10) as resp:
return resp.status, resp.headers.get("Content-Type", ""), resp.read()
except urllib.error.HTTPError as err:
return err.code, err.headers.get("Content-Type", ""), err.read()
def test_http_endpoints(vault):
server = main.make_server(vault, 0, quiet=True)
port = server.server_address[1]
base = f"http://127.0.0.1:{port}"
thread = threading.Thread(target=server.serve_forever, daemon=True)
thread.start()
try:
status, _, body = _get(base, "/api/health")
assert status == 200
health = json.loads(body)
assert health["status"] == "ok" and health["nodes"] >= 2
status, _, body = _get(base, "/api/graph")
graph = json.loads(body)
assert status == 200 and len(graph["edges"]) >= 2
status, _, body = _get(base, "/api/nodes?tipo=persona")
rows = json.loads(body)
assert status == 200 and [r["id"] for r in rows] == ["maria-del-mar-perez"]
status, _, body = _get(base, "/api/node/maria-del-mar-perez")
detail = json.loads(body)
assert status == 200 and detail["label"] == "María del Mar Pérez"
# PyYAML parsea la fecha como datetime.date → debe serializar a ISO.
assert detail["frontmatter"]["fecha_nacimiento"] == "1980-05-01"
status, ctype, body = _get(
base,
"/api/attachment?path=attachments/personas/maria-del-mar-perez/dni-maria.jpg",
)
assert status == 200 and ctype.startswith("image/") and body[:3] == b"\xff\xd8\xff"
# Error path del DoD: traversal jamás sirve fuera del vault.
status, _, _ = _get(base, "/api/attachment?path=../../etc/passwd")
assert status == 403
status, _, body = _get(base, "/api/search?q=ACME")
hits = json.loads(body)
assert status == 200 and any(h["id"] == "acme-sl" for h in hits)
status, _, _ = _get(base, "/api/node/slug-fantasma")
assert status == 404
# POST /api/refresh reconstruye la caché.
req = urllib.request.Request(base + "/api/refresh", method="POST")
with urllib.request.urlopen(req, timeout=10) as resp:
refreshed = json.loads(resp.read())
assert resp.status == 200 and refreshed["status"] == "refreshed"
finally:
server.shutdown()
server.server_close()