feat: initial scaffold of osint_web — backend Python sobre el grupo obsidian
Fase 5b del issue 0172. Backend stdlib http (solo 127.0.0.1) que orquesta las funciones del grupo obsidian del fn_registry para servir el vault OSINT: grafo agregado (/api/graph), tablas por tipo (/api/nodes), fichas con attachments (/api/node, /api/attachment con bloqueo de path traversal) y busqueda (/api/search). Cache en memoria con POST /api/refresh. Tests pytest (10) sobre vault fixture: grafo golden, tipo filtrado, ficha con attachments, wikilink dangling, slug con acentos, traversal bloqueado, vault inexistente (exit 2) y e2e HTTP en puerto efimero. Frontend (React + Vite + Mantine + sigma.js) queda para la fase siguiente. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,241 @@
|
||||
"""Tests del backend osint_web sobre un vault fixture efímero.
|
||||
|
||||
Cubre los escenarios del Definition of Done del issue 0172 que aplican al
|
||||
backend: grafo golden, tabla filtrada por tipo, ficha con attachments,
|
||||
wikilink dangling, slug con acentos, path traversal bloqueado y vault
|
||||
inexistente con error claro. Incluye un test e2e que levanta el servidor en
|
||||
un puerto efímero y golpea los endpoints reales por HTTP.
|
||||
"""
|
||||
|
||||
import importlib.util
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import threading
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
|
||||
import pytest
|
||||
|
||||
HERE = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
_spec = importlib.util.spec_from_file_location(
|
||||
"osint_web_main", os.path.join(HERE, "main.py")
|
||||
)
|
||||
main = importlib.util.module_from_spec(_spec)
|
||||
_spec.loader.exec_module(main)
|
||||
|
||||
|
||||
# --- fixture: vault mínimo con personas, organizaciones y attachments --------
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def vault(tmp_path):
|
||||
"""Vault Obsidian efímero: 2 notas reales conectadas + 1 wikilink roto."""
|
||||
root = tmp_path / "vault_osint"
|
||||
(root / ".obsidian").mkdir(parents=True)
|
||||
(root / ".obsidian" / "app.json").write_text("{}", encoding="utf-8")
|
||||
|
||||
persona_dir = root / "personas"
|
||||
persona_dir.mkdir()
|
||||
(persona_dir / "maria-del-mar-perez.md").write_text(
|
||||
"---\n"
|
||||
"tipo: persona\n"
|
||||
"nombre: María del Mar Pérez\n"
|
||||
"dni: 12345678Z\n"
|
||||
"fecha_nacimiento: 1980-05-01\n"
|
||||
"tags: [objetivo]\n"
|
||||
"---\n"
|
||||
"\n"
|
||||
"Ficha de prueba.\n"
|
||||
"\n"
|
||||
"## Relaciones\n"
|
||||
"\n"
|
||||
"- [[ACME SL]]\n"
|
||||
"- [[Persona-Inexistente]]\n"
|
||||
"\n"
|
||||
"## Documentos\n"
|
||||
"\n"
|
||||
"![[dni-maria.jpg]]\n"
|
||||
"![[certificado-perdido.pdf]]\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
org_dir = root / "organizaciones"
|
||||
org_dir.mkdir()
|
||||
(org_dir / "acme-sl.md").write_text(
|
||||
"---\n"
|
||||
"tipo: organizacion\n"
|
||||
"nombre: ACME SL\n"
|
||||
"cif: B00000000\n"
|
||||
"---\n"
|
||||
"\n"
|
||||
"## Relaciones\n"
|
||||
"\n"
|
||||
"- [[María del Mar Pérez]]\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
attach_dir = root / "attachments" / "personas" / "maria-del-mar-perez"
|
||||
attach_dir.mkdir(parents=True)
|
||||
(attach_dir / "dni-maria.jpg").write_bytes(b"\xff\xd8\xff" + b"fakejpegdata")
|
||||
|
||||
return str(root)
|
||||
|
||||
|
||||
# --- VaultState: grafo, tablas, fichas ----------------------------------------
|
||||
|
||||
|
||||
def test_graph_golden(vault):
|
||||
state = main.VaultState(vault)
|
||||
ids = {n["id"] for n in state.graph["nodes"]}
|
||||
assert {"maria-del-mar-perez", "acme-sl"} <= ids
|
||||
# Arista de la sección ## Relaciones con kind correcto y destino resuelto.
|
||||
assert {
|
||||
"source": "maria-del-mar-perez",
|
||||
"target": "acme-sl",
|
||||
"kind": "relacion",
|
||||
} in state.graph["edges"]
|
||||
|
||||
|
||||
def test_wikilink_acentos_resuelve_por_slug(vault):
|
||||
"""[[María del Mar Pérez]] (acentos, mayúsculas) → maria-del-mar-perez.md."""
|
||||
state = main.VaultState(vault)
|
||||
assert {
|
||||
"source": "acme-sl",
|
||||
"target": "maria-del-mar-perez",
|
||||
"kind": "relacion",
|
||||
} in state.graph["edges"]
|
||||
|
||||
|
||||
def test_wikilink_dangling_genera_nodo_fantasma(vault):
|
||||
state = main.VaultState(vault)
|
||||
ghosts = [n for n in state.graph["nodes"] if n.get("dangling")]
|
||||
assert any(n["id"] == "persona-inexistente" for n in ghosts)
|
||||
# Y no aparece en las tablas (solo nodos reales).
|
||||
assert all(r["id"] != "persona-inexistente" for r in state.rows_by_tipo(""))
|
||||
|
||||
|
||||
def test_rows_filtradas_por_tipo(vault):
|
||||
state = main.VaultState(vault)
|
||||
rows = state.rows_by_tipo("organizacion")
|
||||
assert [r["id"] for r in rows] == ["acme-sl"]
|
||||
assert rows[0]["frontmatter"]["cif"] == "B00000000"
|
||||
|
||||
|
||||
def test_node_detail_con_attachments(vault):
|
||||
state = main.VaultState(vault)
|
||||
detail = state.node_detail("maria-del-mar-perez")
|
||||
assert detail is not None
|
||||
assert detail["frontmatter"]["dni"] == "12345678Z"
|
||||
assert "Ficha de prueba" in detail["body"]
|
||||
by_name = {a["name"]: a for a in detail["attachments"]}
|
||||
dni = by_name["dni-maria.jpg"]
|
||||
assert dni["kind"] == "image"
|
||||
assert dni["path"] == os.path.join(
|
||||
"attachments", "personas", "maria-del-mar-perez", "dni-maria.jpg"
|
||||
)
|
||||
# Embed que no resuelve a archivo → marcado missing, sin crash.
|
||||
assert by_name["certificado-perdido.pdf"]["kind"] == "missing"
|
||||
|
||||
|
||||
def test_node_detail_desconocido(vault):
|
||||
state = main.VaultState(vault)
|
||||
assert state.node_detail("no-existe-este-slug") is None
|
||||
|
||||
|
||||
# --- seguridad: path traversal + vault inexistente ----------------------------
|
||||
|
||||
|
||||
def test_attachment_path_traversal_bloqueado(vault):
|
||||
state = main.VaultState(vault)
|
||||
assert state.resolve_attachment_path("../../etc/passwd") is None
|
||||
assert state.resolve_attachment_path("/etc/passwd") is None
|
||||
assert state.resolve_attachment_path("") is None
|
||||
assert state.resolve_attachment_path(".") is None
|
||||
# Un path legítimo dentro del vault sí resuelve.
|
||||
ok = state.resolve_attachment_path(
|
||||
"attachments/personas/maria-del-mar-perez/dni-maria.jpg"
|
||||
)
|
||||
assert ok is not None and ok.endswith("dni-maria.jpg")
|
||||
|
||||
|
||||
def test_vault_inexistente_error_claro():
|
||||
with pytest.raises(FileNotFoundError, match="el vault no existe"):
|
||||
main.VaultState("/no/existe/este/vault")
|
||||
|
||||
|
||||
def test_cli_vault_inexistente_exit_2():
|
||||
proc = subprocess.run(
|
||||
[sys.executable, os.path.join(HERE, "main.py"), "--vault", "/no/existe"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=30,
|
||||
)
|
||||
assert proc.returncode == 2
|
||||
assert "el vault no existe" in proc.stderr
|
||||
|
||||
|
||||
# --- e2e HTTP: server real en puerto efímero ----------------------------------
|
||||
|
||||
|
||||
def _get(base, path):
|
||||
try:
|
||||
with urllib.request.urlopen(base + path, timeout=10) as resp:
|
||||
return resp.status, resp.headers.get("Content-Type", ""), resp.read()
|
||||
except urllib.error.HTTPError as err:
|
||||
return err.code, err.headers.get("Content-Type", ""), err.read()
|
||||
|
||||
|
||||
def test_http_endpoints(vault):
|
||||
server = main.make_server(vault, 0, quiet=True)
|
||||
port = server.server_address[1]
|
||||
base = f"http://127.0.0.1:{port}"
|
||||
thread = threading.Thread(target=server.serve_forever, daemon=True)
|
||||
thread.start()
|
||||
try:
|
||||
status, _, body = _get(base, "/api/health")
|
||||
assert status == 200
|
||||
health = json.loads(body)
|
||||
assert health["status"] == "ok" and health["nodes"] >= 2
|
||||
|
||||
status, _, body = _get(base, "/api/graph")
|
||||
graph = json.loads(body)
|
||||
assert status == 200 and len(graph["edges"]) >= 2
|
||||
|
||||
status, _, body = _get(base, "/api/nodes?tipo=persona")
|
||||
rows = json.loads(body)
|
||||
assert status == 200 and [r["id"] for r in rows] == ["maria-del-mar-perez"]
|
||||
|
||||
status, _, body = _get(base, "/api/node/maria-del-mar-perez")
|
||||
detail = json.loads(body)
|
||||
assert status == 200 and detail["label"] == "María del Mar Pérez"
|
||||
# PyYAML parsea la fecha como datetime.date → debe serializar a ISO.
|
||||
assert detail["frontmatter"]["fecha_nacimiento"] == "1980-05-01"
|
||||
|
||||
status, ctype, body = _get(
|
||||
base,
|
||||
"/api/attachment?path=attachments/personas/maria-del-mar-perez/dni-maria.jpg",
|
||||
)
|
||||
assert status == 200 and ctype.startswith("image/") and body[:3] == b"\xff\xd8\xff"
|
||||
|
||||
# Error path del DoD: traversal jamás sirve fuera del vault.
|
||||
status, _, _ = _get(base, "/api/attachment?path=../../etc/passwd")
|
||||
assert status == 403
|
||||
|
||||
status, _, body = _get(base, "/api/search?q=ACME")
|
||||
hits = json.loads(body)
|
||||
assert status == 200 and any(h["id"] == "acme-sl" for h in hits)
|
||||
|
||||
status, _, _ = _get(base, "/api/node/slug-fantasma")
|
||||
assert status == 404
|
||||
|
||||
# POST /api/refresh reconstruye la caché.
|
||||
req = urllib.request.Request(base + "/api/refresh", method="POST")
|
||||
with urllib.request.urlopen(req, timeout=10) as resp:
|
||||
refreshed = json.loads(resp.read())
|
||||
assert resp.status == 200 and refreshed["status"] == "refreshed"
|
||||
finally:
|
||||
server.shutdown()
|
||||
server.server_close()
|
||||
Reference in New Issue
Block a user