6af9a56c28
Fase 5b del issue 0172. Backend stdlib http (solo 127.0.0.1) que orquesta las funciones del grupo obsidian del fn_registry para servir el vault OSINT: grafo agregado (/api/graph), tablas por tipo (/api/nodes), fichas con attachments (/api/node, /api/attachment con bloqueo de path traversal) y busqueda (/api/search). Cache en memoria con POST /api/refresh. Tests pytest (10) sobre vault fixture: grafo golden, tipo filtrado, ficha con attachments, wikilink dangling, slug con acentos, traversal bloqueado, vault inexistente (exit 2) y e2e HTTP en puerto efimero. Frontend (React + Vite + Mantine + sigma.js) queda para la fase siguiente. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
242 lines
8.0 KiB
Python
242 lines
8.0 KiB
Python
"""Tests del backend osint_web sobre un vault fixture efímero.
|
|
|
|
Cubre los escenarios del Definition of Done del issue 0172 que aplican al
|
|
backend: grafo golden, tabla filtrada por tipo, ficha con attachments,
|
|
wikilink dangling, slug con acentos, path traversal bloqueado y vault
|
|
inexistente con error claro. Incluye un test e2e que levanta el servidor en
|
|
un puerto efímero y golpea los endpoints reales por HTTP.
|
|
"""
|
|
|
|
import importlib.util
|
|
import json
|
|
import os
|
|
import subprocess
|
|
import sys
|
|
import threading
|
|
import urllib.error
|
|
import urllib.request
|
|
|
|
import pytest
|
|
|
|
HERE = os.path.dirname(os.path.abspath(__file__))
|
|
|
|
_spec = importlib.util.spec_from_file_location(
|
|
"osint_web_main", os.path.join(HERE, "main.py")
|
|
)
|
|
main = importlib.util.module_from_spec(_spec)
|
|
_spec.loader.exec_module(main)
|
|
|
|
|
|
# --- fixture: vault mínimo con personas, organizaciones y attachments --------
|
|
|
|
|
|
@pytest.fixture()
|
|
def vault(tmp_path):
|
|
"""Vault Obsidian efímero: 2 notas reales conectadas + 1 wikilink roto."""
|
|
root = tmp_path / "vault_osint"
|
|
(root / ".obsidian").mkdir(parents=True)
|
|
(root / ".obsidian" / "app.json").write_text("{}", encoding="utf-8")
|
|
|
|
persona_dir = root / "personas"
|
|
persona_dir.mkdir()
|
|
(persona_dir / "maria-del-mar-perez.md").write_text(
|
|
"---\n"
|
|
"tipo: persona\n"
|
|
"nombre: María del Mar Pérez\n"
|
|
"dni: 12345678Z\n"
|
|
"fecha_nacimiento: 1980-05-01\n"
|
|
"tags: [objetivo]\n"
|
|
"---\n"
|
|
"\n"
|
|
"Ficha de prueba.\n"
|
|
"\n"
|
|
"## Relaciones\n"
|
|
"\n"
|
|
"- [[ACME SL]]\n"
|
|
"- [[Persona-Inexistente]]\n"
|
|
"\n"
|
|
"## Documentos\n"
|
|
"\n"
|
|
"![[dni-maria.jpg]]\n"
|
|
"![[certificado-perdido.pdf]]\n",
|
|
encoding="utf-8",
|
|
)
|
|
|
|
org_dir = root / "organizaciones"
|
|
org_dir.mkdir()
|
|
(org_dir / "acme-sl.md").write_text(
|
|
"---\n"
|
|
"tipo: organizacion\n"
|
|
"nombre: ACME SL\n"
|
|
"cif: B00000000\n"
|
|
"---\n"
|
|
"\n"
|
|
"## Relaciones\n"
|
|
"\n"
|
|
"- [[María del Mar Pérez]]\n",
|
|
encoding="utf-8",
|
|
)
|
|
|
|
attach_dir = root / "attachments" / "personas" / "maria-del-mar-perez"
|
|
attach_dir.mkdir(parents=True)
|
|
(attach_dir / "dni-maria.jpg").write_bytes(b"\xff\xd8\xff" + b"fakejpegdata")
|
|
|
|
return str(root)
|
|
|
|
|
|
# --- VaultState: grafo, tablas, fichas ----------------------------------------
|
|
|
|
|
|
def test_graph_golden(vault):
|
|
state = main.VaultState(vault)
|
|
ids = {n["id"] for n in state.graph["nodes"]}
|
|
assert {"maria-del-mar-perez", "acme-sl"} <= ids
|
|
# Arista de la sección ## Relaciones con kind correcto y destino resuelto.
|
|
assert {
|
|
"source": "maria-del-mar-perez",
|
|
"target": "acme-sl",
|
|
"kind": "relacion",
|
|
} in state.graph["edges"]
|
|
|
|
|
|
def test_wikilink_acentos_resuelve_por_slug(vault):
|
|
"""[[María del Mar Pérez]] (acentos, mayúsculas) → maria-del-mar-perez.md."""
|
|
state = main.VaultState(vault)
|
|
assert {
|
|
"source": "acme-sl",
|
|
"target": "maria-del-mar-perez",
|
|
"kind": "relacion",
|
|
} in state.graph["edges"]
|
|
|
|
|
|
def test_wikilink_dangling_genera_nodo_fantasma(vault):
|
|
state = main.VaultState(vault)
|
|
ghosts = [n for n in state.graph["nodes"] if n.get("dangling")]
|
|
assert any(n["id"] == "persona-inexistente" for n in ghosts)
|
|
# Y no aparece en las tablas (solo nodos reales).
|
|
assert all(r["id"] != "persona-inexistente" for r in state.rows_by_tipo(""))
|
|
|
|
|
|
def test_rows_filtradas_por_tipo(vault):
|
|
state = main.VaultState(vault)
|
|
rows = state.rows_by_tipo("organizacion")
|
|
assert [r["id"] for r in rows] == ["acme-sl"]
|
|
assert rows[0]["frontmatter"]["cif"] == "B00000000"
|
|
|
|
|
|
def test_node_detail_con_attachments(vault):
|
|
state = main.VaultState(vault)
|
|
detail = state.node_detail("maria-del-mar-perez")
|
|
assert detail is not None
|
|
assert detail["frontmatter"]["dni"] == "12345678Z"
|
|
assert "Ficha de prueba" in detail["body"]
|
|
by_name = {a["name"]: a for a in detail["attachments"]}
|
|
dni = by_name["dni-maria.jpg"]
|
|
assert dni["kind"] == "image"
|
|
assert dni["path"] == os.path.join(
|
|
"attachments", "personas", "maria-del-mar-perez", "dni-maria.jpg"
|
|
)
|
|
# Embed que no resuelve a archivo → marcado missing, sin crash.
|
|
assert by_name["certificado-perdido.pdf"]["kind"] == "missing"
|
|
|
|
|
|
def test_node_detail_desconocido(vault):
|
|
state = main.VaultState(vault)
|
|
assert state.node_detail("no-existe-este-slug") is None
|
|
|
|
|
|
# --- seguridad: path traversal + vault inexistente ----------------------------
|
|
|
|
|
|
def test_attachment_path_traversal_bloqueado(vault):
|
|
state = main.VaultState(vault)
|
|
assert state.resolve_attachment_path("../../etc/passwd") is None
|
|
assert state.resolve_attachment_path("/etc/passwd") is None
|
|
assert state.resolve_attachment_path("") is None
|
|
assert state.resolve_attachment_path(".") is None
|
|
# Un path legítimo dentro del vault sí resuelve.
|
|
ok = state.resolve_attachment_path(
|
|
"attachments/personas/maria-del-mar-perez/dni-maria.jpg"
|
|
)
|
|
assert ok is not None and ok.endswith("dni-maria.jpg")
|
|
|
|
|
|
def test_vault_inexistente_error_claro():
|
|
with pytest.raises(FileNotFoundError, match="el vault no existe"):
|
|
main.VaultState("/no/existe/este/vault")
|
|
|
|
|
|
def test_cli_vault_inexistente_exit_2():
|
|
proc = subprocess.run(
|
|
[sys.executable, os.path.join(HERE, "main.py"), "--vault", "/no/existe"],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=30,
|
|
)
|
|
assert proc.returncode == 2
|
|
assert "el vault no existe" in proc.stderr
|
|
|
|
|
|
# --- e2e HTTP: server real en puerto efímero ----------------------------------
|
|
|
|
|
|
def _get(base, path):
|
|
try:
|
|
with urllib.request.urlopen(base + path, timeout=10) as resp:
|
|
return resp.status, resp.headers.get("Content-Type", ""), resp.read()
|
|
except urllib.error.HTTPError as err:
|
|
return err.code, err.headers.get("Content-Type", ""), err.read()
|
|
|
|
|
|
def test_http_endpoints(vault):
|
|
server = main.make_server(vault, 0, quiet=True)
|
|
port = server.server_address[1]
|
|
base = f"http://127.0.0.1:{port}"
|
|
thread = threading.Thread(target=server.serve_forever, daemon=True)
|
|
thread.start()
|
|
try:
|
|
status, _, body = _get(base, "/api/health")
|
|
assert status == 200
|
|
health = json.loads(body)
|
|
assert health["status"] == "ok" and health["nodes"] >= 2
|
|
|
|
status, _, body = _get(base, "/api/graph")
|
|
graph = json.loads(body)
|
|
assert status == 200 and len(graph["edges"]) >= 2
|
|
|
|
status, _, body = _get(base, "/api/nodes?tipo=persona")
|
|
rows = json.loads(body)
|
|
assert status == 200 and [r["id"] for r in rows] == ["maria-del-mar-perez"]
|
|
|
|
status, _, body = _get(base, "/api/node/maria-del-mar-perez")
|
|
detail = json.loads(body)
|
|
assert status == 200 and detail["label"] == "María del Mar Pérez"
|
|
# PyYAML parsea la fecha como datetime.date → debe serializar a ISO.
|
|
assert detail["frontmatter"]["fecha_nacimiento"] == "1980-05-01"
|
|
|
|
status, ctype, body = _get(
|
|
base,
|
|
"/api/attachment?path=attachments/personas/maria-del-mar-perez/dni-maria.jpg",
|
|
)
|
|
assert status == 200 and ctype.startswith("image/") and body[:3] == b"\xff\xd8\xff"
|
|
|
|
# Error path del DoD: traversal jamás sirve fuera del vault.
|
|
status, _, _ = _get(base, "/api/attachment?path=../../etc/passwd")
|
|
assert status == 403
|
|
|
|
status, _, body = _get(base, "/api/search?q=ACME")
|
|
hits = json.loads(body)
|
|
assert status == 200 and any(h["id"] == "acme-sl" for h in hits)
|
|
|
|
status, _, _ = _get(base, "/api/node/slug-fantasma")
|
|
assert status == 404
|
|
|
|
# POST /api/refresh reconstruye la caché.
|
|
req = urllib.request.Request(base + "/api/refresh", method="POST")
|
|
with urllib.request.urlopen(req, timeout=10) as resp:
|
|
refreshed = json.loads(resp.read())
|
|
assert resp.status == 200 and refreshed["status"] == "refreshed"
|
|
finally:
|
|
server.shutdown()
|
|
server.server_close()
|