"""Tests del backend osint_web sobre un vault fixture efímero. Cubre los escenarios del Definition of Done del issue 0172 que aplican al backend: grafo golden, tabla filtrada por tipo, ficha con attachments, wikilink dangling, slug con acentos, path traversal bloqueado y vault inexistente con error claro. Incluye un test e2e que levanta el servidor en un puerto efímero y golpea los endpoints reales por HTTP. """ import importlib.util import json import os import subprocess import sys import threading import urllib.error import urllib.request import pytest HERE = os.path.dirname(os.path.abspath(__file__)) _spec = importlib.util.spec_from_file_location( "osint_web_main", os.path.join(HERE, "main.py") ) main = importlib.util.module_from_spec(_spec) _spec.loader.exec_module(main) # --- fixture: vault mínimo con personas, organizaciones y attachments -------- @pytest.fixture() def vault(tmp_path): """Vault Obsidian efímero: 2 notas reales conectadas + 1 wikilink roto.""" root = tmp_path / "vault_osint" (root / ".obsidian").mkdir(parents=True) (root / ".obsidian" / "app.json").write_text("{}", encoding="utf-8") persona_dir = root / "personas" persona_dir.mkdir() (persona_dir / "maria-del-mar-perez.md").write_text( "---\n" "tipo: persona\n" "nombre: María del Mar Pérez\n" "dni: 12345678Z\n" "fecha_nacimiento: 1980-05-01\n" "tags: [objetivo]\n" "---\n" "\n" "Ficha de prueba.\n" "\n" "## Relaciones\n" "\n" "- [[ACME SL]]\n" "- [[Persona-Inexistente]]\n" "\n" "## Documentos\n" "\n" "![[dni-maria.jpg]]\n" "![[certificado-perdido.pdf]]\n", encoding="utf-8", ) org_dir = root / "organizaciones" org_dir.mkdir() (org_dir / "acme-sl.md").write_text( "---\n" "tipo: organizacion\n" "nombre: ACME SL\n" "cif: B00000000\n" "---\n" "\n" "## Relaciones\n" "\n" "- [[María del Mar Pérez]]\n", encoding="utf-8", ) attach_dir = root / "attachments" / "personas" / "maria-del-mar-perez" attach_dir.mkdir(parents=True) (attach_dir / "dni-maria.jpg").write_bytes(b"\xff\xd8\xff" + b"fakejpegdata") return str(root) # --- VaultState: grafo, tablas, fichas ---------------------------------------- def test_graph_golden(vault): state = main.VaultState(vault) ids = {n["id"] for n in state.graph["nodes"]} assert {"maria-del-mar-perez", "acme-sl"} <= ids # Arista de la sección ## Relaciones con kind correcto y destino resuelto. assert { "source": "maria-del-mar-perez", "target": "acme-sl", "kind": "relacion", } in state.graph["edges"] def test_wikilink_acentos_resuelve_por_slug(vault): """[[María del Mar Pérez]] (acentos, mayúsculas) → maria-del-mar-perez.md.""" state = main.VaultState(vault) assert { "source": "acme-sl", "target": "maria-del-mar-perez", "kind": "relacion", } in state.graph["edges"] def test_wikilink_dangling_genera_nodo_fantasma(vault): state = main.VaultState(vault) ghosts = [n for n in state.graph["nodes"] if n.get("dangling")] assert any(n["id"] == "persona-inexistente" for n in ghosts) # Y no aparece en las tablas (solo nodos reales). assert all(r["id"] != "persona-inexistente" for r in state.rows_by_tipo("")) def test_rows_filtradas_por_tipo(vault): state = main.VaultState(vault) rows = state.rows_by_tipo("organizacion") assert [r["id"] for r in rows] == ["acme-sl"] assert rows[0]["frontmatter"]["cif"] == "B00000000" def test_node_detail_con_attachments(vault): state = main.VaultState(vault) detail = state.node_detail("maria-del-mar-perez") assert detail is not None assert detail["frontmatter"]["dni"] == "12345678Z" assert "Ficha de prueba" in detail["body"] by_name = {a["name"]: a for a in detail["attachments"]} dni = by_name["dni-maria.jpg"] assert dni["kind"] == "image" assert dni["path"] == os.path.join( "attachments", "personas", "maria-del-mar-perez", "dni-maria.jpg" ) # Embed que no resuelve a archivo → marcado missing, sin crash. assert by_name["certificado-perdido.pdf"]["kind"] == "missing" def test_node_detail_desconocido(vault): state = main.VaultState(vault) assert state.node_detail("no-existe-este-slug") is None # --- seguridad: path traversal + vault inexistente ---------------------------- def test_attachment_path_traversal_bloqueado(vault): state = main.VaultState(vault) assert state.resolve_attachment_path("../../etc/passwd") is None assert state.resolve_attachment_path("/etc/passwd") is None assert state.resolve_attachment_path("") is None assert state.resolve_attachment_path(".") is None # Un path legítimo dentro del vault sí resuelve. ok = state.resolve_attachment_path( "attachments/personas/maria-del-mar-perez/dni-maria.jpg" ) assert ok is not None and ok.endswith("dni-maria.jpg") def test_vault_inexistente_error_claro(): with pytest.raises(FileNotFoundError, match="el vault no existe"): main.VaultState("/no/existe/este/vault") def test_cli_vault_inexistente_exit_2(): proc = subprocess.run( [sys.executable, os.path.join(HERE, "main.py"), "--vault", "/no/existe"], capture_output=True, text=True, timeout=30, ) assert proc.returncode == 2 assert "el vault no existe" in proc.stderr # --- e2e HTTP: server real en puerto efímero ---------------------------------- def _get(base, path): try: with urllib.request.urlopen(base + path, timeout=10) as resp: return resp.status, resp.headers.get("Content-Type", ""), resp.read() except urllib.error.HTTPError as err: return err.code, err.headers.get("Content-Type", ""), err.read() def test_http_endpoints(vault): server = main.make_server(vault, 0, quiet=True) port = server.server_address[1] base = f"http://127.0.0.1:{port}" thread = threading.Thread(target=server.serve_forever, daemon=True) thread.start() try: status, _, body = _get(base, "/api/health") assert status == 200 health = json.loads(body) assert health["status"] == "ok" and health["nodes"] >= 2 status, _, body = _get(base, "/api/graph") graph = json.loads(body) assert status == 200 and len(graph["edges"]) >= 2 status, _, body = _get(base, "/api/nodes?tipo=persona") rows = json.loads(body) assert status == 200 and [r["id"] for r in rows] == ["maria-del-mar-perez"] status, _, body = _get(base, "/api/node/maria-del-mar-perez") detail = json.loads(body) assert status == 200 and detail["label"] == "María del Mar Pérez" # PyYAML parsea la fecha como datetime.date → debe serializar a ISO. assert detail["frontmatter"]["fecha_nacimiento"] == "1980-05-01" status, ctype, body = _get( base, "/api/attachment?path=attachments/personas/maria-del-mar-perez/dni-maria.jpg", ) assert status == 200 and ctype.startswith("image/") and body[:3] == b"\xff\xd8\xff" # Error path del DoD: traversal jamás sirve fuera del vault. status, _, _ = _get(base, "/api/attachment?path=../../etc/passwd") assert status == 403 status, _, body = _get(base, "/api/search?q=ACME") hits = json.loads(body) assert status == 200 and any(h["id"] == "acme-sl" for h in hits) status, _, _ = _get(base, "/api/node/slug-fantasma") assert status == 404 # POST /api/refresh reconstruye la caché. req = urllib.request.Request(base + "/api/refresh", method="POST") with urllib.request.urlopen(req, timeout=10) as resp: refreshed = json.loads(resp.read()) assert resp.status == 200 and refreshed["status"] == "refreshed" finally: server.shutdown() server.server_close()