"""Tests del backend osint_web. Cubren el contrato del DoD del issue 0172 sin depender de la red (Xandikos): - Path traversal en /api/attachment (seguridad obligatoria). - Vault inexistente -> error claro al arrancar, no 500. - Grafo / tablas filtradas por tipo / ficha con attachments sobre un vault mínimo sintético construido en un tmpdir. - Endpoints DAV: degradación clara (no crash) cuando Xandikos no responde, y parseo vCard/iCalendar a JSON sin red. Se usa el ``TestClient`` de Starlette/FastAPI sobre un vault temporal, así los tests son herméticos y deterministas (no tocan el vault real con PII). """ import os import sys import pytest from fastapi.testclient import TestClient # El backend orquesta funciones del registry: hay que poder importarlas. _HERE = os.path.dirname(os.path.abspath(__file__)) sys.path.insert(0, os.path.join(_HERE, "..", "server")) import main as srv # noqa: E402 # --------------------------------------------------------------------------- # Fixtures: vault sintético mínimo # --------------------------------------------------------------------------- def _write(path: str, content: str) -> None: os.makedirs(os.path.dirname(path), exist_ok=True) with open(path, "w", encoding="utf-8") as f: f.write(content) @pytest.fixture() def vault(tmp_path): """Construye un vault de Obsidian mínimo con personas, una org y un attachment.""" root = tmp_path / "osint" # Una persona con foto embebida (por path) y un wikilink a una org y a una # persona inexistente. _write( str(root / "personas" / "ana-gomez.md"), "---\n" "tipo: persona\n" "nombre: Ana Gómez\n" "dni: 12345678A\n" "tags: [objetivo]\n" "---\n\n" "## Relaciones\n" "- [[acme-sl]]\n" "- [[Persona-Inexistente]]\n\n" "## Documentos\n" "![[attachments/personas/ana-gomez/ana-foto.jpg]]\n", ) _write( str(root / "organizaciones" / "acme-sl.md"), "---\ntipo: organizacion\nnombre: Acme SL\ncif: B12345678\n---\n\nOrg de prueba.\n", ) # El attachment embebido (basta un archivo cualquiera). _write(str(root / "attachments" / "personas" / "ana-gomez" / "ana-foto.jpg"), "FAKEJPEGDATA") # Un archivo secreto FUERA del vault, para el test de path traversal. _write(str(tmp_path / "secret.txt"), "TOP SECRET") return str(root) @pytest.fixture() def client(vault): app = srv.create_app(vault) return TestClient(app) # --------------------------------------------------------------------------- # Golden: grafo carga el vault # --------------------------------------------------------------------------- def test_graph_loads_vault(client): resp = client.get("/api/graph") assert resp.status_code == 200 data = resp.json() # 2 notas reales (ana, acme) + 1 nodo fantasma (Persona-Inexistente). ids = {n["id"] for n in data["nodes"]} assert "ana-gomez" in ids assert "acme-sl" in ids assert data["total_edges"] >= 1 # Conteos por tipo presentes para la leyenda. assert data["counts"].get("persona") == 1 assert data["counts"].get("organizacion") == 1 def test_node_card_with_attachments(client): resp = client.get("/api/node/ana-gomez") assert resp.status_code == 200 data = resp.json() assert data["frontmatter"]["nombre"] == "Ana Gómez" assert data["body"].strip() != "" # La galería de attachments resuelve la foto embebida (por path). foto = next(a for a in data["attachments"] if a["name"].endswith("ana-foto.jpg")) assert foto["kind"] == "image" assert foto["path"] # path relativo al vault, no vacío # --------------------------------------------------------------------------- # Edge: tabla filtrada por tipo # --------------------------------------------------------------------------- def test_nodes_filtered_by_tipo(client): resp = client.get("/api/nodes", params={"tipo": "organizacion"}) assert resp.status_code == 200 data = resp.json() assert data["count"] == 1 assert all(r["tipo"] == "organizacion" for r in data["rows"]) assert data["rows"][0]["id"] == "acme-sl" # --------------------------------------------------------------------------- # Edge: wikilink dangling -> nodo fantasma, sin crash # --------------------------------------------------------------------------- def test_dangling_wikilink_is_phantom(client): data = client.get("/api/graph").json() phantom_ids = {n["id"] for n in data["nodes"] if n.get("dangling")} assert "persona-inexistente" in phantom_ids # --------------------------------------------------------------------------- # Edge: nombre con mayúsculas/acentos -> slug estable # --------------------------------------------------------------------------- def test_slugify_accents(): assert srv.slugify_obsidian_name("María del Mar") == "maria-del-mar" # --------------------------------------------------------------------------- # Error: path traversal en attachment (SEGURIDAD obligatoria) # --------------------------------------------------------------------------- def test_attachment_path_traversal_blocked(client): resp = client.get("/api/attachment", params={"path": "../../etc/passwd"}) assert resp.status_code in (403, 404) assert "root:" not in resp.text resp2 = client.get("/api/attachment", params={"path": "../secret.txt"}) assert resp2.status_code in (403, 404) assert "TOP SECRET" not in resp2.text def test_attachment_legit_served(client): rel = os.path.join("attachments", "personas", "ana-gomez", "ana-foto.jpg") resp = client.get("/api/attachment", params={"path": rel}) assert resp.status_code == 200 assert resp.content == b"FAKEJPEGDATA" def test_attachment_nonexistent_inside_vault_404(client): resp = client.get("/api/attachment", params={"path": "attachments/no-existe.png"}) assert resp.status_code == 404 # --------------------------------------------------------------------------- # Error: vault inexistente -> error claro al arrancar, no 500 # --------------------------------------------------------------------------- def test_vault_inexistent_raises_clear_error(): with pytest.raises(FileNotFoundError): srv.create_app("/no/existe/vault/osint") # --------------------------------------------------------------------------- # Búsqueda # --------------------------------------------------------------------------- def test_search_finds_node(client): resp = client.get("/api/search", params={"q": "Ana"}) assert resp.status_code == 200 ids = {r["id"] for r in resp.json()["results"]} assert "ana-gomez" in ids # --------------------------------------------------------------------------- # DAV: parseo a JSON (sin red) + degradación clara # --------------------------------------------------------------------------- def test_vcard_to_json(): vcard = ( "BEGIN:VCARD\r\n" "VERSION:3.0\r\n" "UID:abc-123\r\n" "FN:Juan Pérez\r\n" "NICKNAME:Juanito\r\n" "ORG:Acme;Ventas\r\n" "TEL;TYPE=CELL:+34600111222\r\n" "EMAIL;TYPE=HOME:juan@example.com\r\n" "NOTE:Contacto de prueba\r\n" "END:VCARD\r\n" ) out = srv._vcard_to_json(vcard) assert out["uid"] == "abc-123" assert out["fn"] == "Juan Pérez" assert out["nickname"] == "Juanito" assert out["org"] == "Acme Ventas" assert out["phones"][0]["value"] == "+34600111222" assert out["emails"][0]["value"] == "juan@example.com" def test_vevent_to_json_and_range(): vcal = ( "BEGIN:VCALENDAR\r\n" "BEGIN:VEVENT\r\n" "UID:evt-1\r\n" "SUMMARY:Reunión OSINT\r\n" "DTSTART:20260615T090000Z\r\n" "DTEND:20260615T100000Z\r\n" "LOCATION:Oficina\r\n" "END:VEVENT\r\n" "END:VCALENDAR\r\n" ) events = srv._vcalendar_to_events(vcal) assert len(events) == 1 evt = events[0] assert evt["summary"] == "Reunión OSINT" assert evt["dtstart"].startswith("20260615") assert srv._event_in_range(evt, "20260601", "20260630") is True assert srv._event_in_range(evt, "20260101", "20260131") is False def test_dav_endpoints_degrade_without_network(client, monkeypatch): """Sin Xandikos accesible los endpoints DAV devuelven 503 claro, no crash. Y los endpoints del vault siguen funcionando offline (no se ven afectados). """ monkeypatch.setattr( srv, "dav_list_resources", lambda *a, **k: {"status": "error", "error": "sin red"} ) # Evita leer pass en el test (cachea una password ficticia). client.app.state.vault._xandikos_password = "x" r1 = client.get("/api/contacts") assert r1.status_code == 503 assert r1.json()["status"] == "error" r2 = client.get("/api/calendar") assert r2.status_code == 503 assert r2.json()["status"] == "error" # El fallo DAV NO contamina los endpoints del vault (offline-OK). assert client.get("/api/graph").status_code == 200 assert client.get("/api/health").status_code == 200 # --------------------------------------------------------------------------- # DAV: campos osint / alias / nota / itemN. + caché + invalidación # --------------------------------------------------------------------------- def test_vcard_to_json_alias_nota_osint_y_item_prefix(): """Parsea alias (NICKNAME), nota (NOTE), osint (X-OSINT-*) y prefijo itemN.""" vcard = ( "BEGIN:VCARD\r\n" "VERSION:3.0\r\n" "UID:maria-001\r\n" "FN:María del Mar Pérez\r\n" "NICKNAME:Marimar\r\n" "item1.TEL;TYPE=CELL:+34 600 111 222\r\n" "item2.EMAIL;TYPE=INTERNET:maria@example.com\r\n" "NOTE:Objetivo principal.\r\n" "X-OSINT-DNI:12345678Z\r\n" "X-OSINT-PAIS:España\r\n" "X-OSINT-SEXO:F\r\n" "END:VCARD\r\n" ) out = srv._vcard_to_json(vcard) assert out["uid"] == "maria-001" assert out["nombre"] == "María del Mar Pérez" assert out["alias"] == "Marimar" assert out["nota"] == "Objetivo principal." # El prefijo itemN. se elimina: TEL/EMAIL se reconocen. assert out["telefonos"] == ["+34 600 111 222"] assert out["correos"] == ["maria@example.com"] # Bloque osint derivado de X-OSINT-*. assert out["osint"] == {"dni": "12345678Z", "pais": "España", "sexo": "F"} def test_vcard_to_json_nombre_desde_N_sin_fn(): vcard = ( "BEGIN:VCARD\r\nVERSION:3.0\r\nUID:juan-002\r\n" "N:García;Juan;;;\r\nTEL:+34 611 222 333\r\nEND:VCARD\r\n" ) out = srv._vcard_to_json(vcard) assert out["nombre"] == "Juan García" assert out["osint"] == {} # Fixture DAV mockeado: dos contactos (uno con osint) y dos eventos. _VCF_BODY = ( "BEGIN:VCARD\r\nVERSION:3.0\r\nUID:maria-001\r\nFN:María Pérez\r\n" "NICKNAME:Mari\r\nX-OSINT-DNI:12345678Z\r\nX-OSINT-PAIS:España\r\n" "item1.TEL;TYPE=CELL:+34600111222\r\nEND:VCARD\r\n" ) _VCF_BODY_2 = ( "BEGIN:VCARD\r\nVERSION:3.0\r\nUID:juan-002\r\nFN:Juan García\r\n" "EMAIL:juan@example.com\r\nEND:VCARD\r\n" ) _ICS_BODY = ( "BEGIN:VCALENDAR\r\nVERSION:2.0\r\nBEGIN:VEVENT\r\nUID:evt-001\r\n" "SUMMARY:Reunión\r\nDTSTART:20260611T090000Z\r\nDTEND:20260611T100000Z\r\n" "LOCATION:Madrid\r\nEND:VEVENT\r\nEND:VCALENDAR\r\n" ) _ICS_BODY_2 = ( "BEGIN:VCALENDAR\r\nVERSION:2.0\r\nBEGIN:VEVENT\r\nUID:evt-002\r\n" "SUMMARY:Vigilancia\r\nDTSTART:20260620T200000Z\r\nEND:VEVENT\r\n" "END:VCALENDAR\r\n" ) @pytest.fixture() def fake_dav(monkeypatch): """Parchea las funciones del registry DAV con fixtures en memoria (sin red). Devuelve un dict ``{"calls": int}`` que cuenta los PROPFIND para verificar el cacheo (segunda lectura no re-llama a Xandikos). """ state = {"calls": 0} contacts_res = [ {"href": "/enmanuel/contacts/addressbook/maria-001.vcf", "etag": '"a"'}, {"href": "/enmanuel/contacts/addressbook/juan-002.vcf", "etag": '"b"'}, ] calendar_res = [ {"href": "/enmanuel/calendars/calendar/evt-001.ics", "etag": '"c"'}, {"href": "/enmanuel/calendars/calendar/evt-002.ics", "etag": '"d"'}, ] bodies = { "/enmanuel/contacts/addressbook/maria-001.vcf": _VCF_BODY, "/enmanuel/contacts/addressbook/juan-002.vcf": _VCF_BODY_2, "/enmanuel/calendars/calendar/evt-001.ics": _ICS_BODY, "/enmanuel/calendars/calendar/evt-002.ics": _ICS_BODY_2, } def _list(base, user, pw, collection, **kw): state["calls"] += 1 res = contacts_res if "contacts" in collection else calendar_res return {"status": "ok", "http_status": 207, "resources": res} def _get(base, user, pw, href, **kw): return {"status": "ok", "http_status": 200, "text": bodies.get(href, "")} monkeypatch.setattr(srv, "dav_list_resources", _list) monkeypatch.setattr(srv, "dav_get_resource", _get) monkeypatch.setattr(srv, "pass_get_secret", lambda *a, **k: {"status": "ok", "value": "x"}) return state def test_contacts_endpoint_parsea_y_cachea(client, fake_dav): r = client.get("/api/contacts") assert r.status_code == 200 data = r.json() assert data["status"] == "ok" and data["count"] == 2 by_uid = {c["uid"]: c for c in data["contacts"]} maria = by_uid["maria-001"] assert maria["nombre"] == "María Pérez" assert maria["alias"] == "Mari" assert maria["telefonos"] == ["+34600111222"] assert maria["osint"] == {"dni": "12345678Z", "pais": "España"} # Segunda llamada NO re-hace PROPFIND (sirve de la caché en memoria). calls_after_first = fake_dav["calls"] client.get("/api/contacts") assert fake_dav["calls"] == calls_after_first def test_contact_by_uid_desde_cache(client, fake_dav): r = client.get("/api/contact/maria-001") assert r.status_code == 200 assert r.json()["contact"]["nombre"] == "María Pérez" assert client.get("/api/contact/no-existe").status_code == 404 def test_calendar_endpoint_rango_y_cache(client, fake_dav): # Sin rango: ambos eventos. r = client.get("/api/calendar") assert r.status_code == 200 and r.json()["count"] == 2 # Con rango: solo evt-001 (11 junio). r2 = client.get("/api/calendar", params={"from": "2026-06-01", "to": "2026-06-15"}) assert [e["uid"] for e in r2.json()["events"]] == ["evt-001"] def test_refresh_invalida_cache_dav(client, fake_dav): client.get("/api/contacts") # llena caché calls_before = fake_dav["calls"] client.post("/api/refresh") # invalida client.get("/api/contacts") # vuelve a hacer PROPFIND assert fake_dav["calls"] > calls_before # --------------------------------------------------------------------------- # Smoke real opcional contra Xandikos (gateado, no corre en CI) # --------------------------------------------------------------------------- @pytest.mark.skipif( os.environ.get("OSINT_WEB_DAV_SMOKE") != "1", reason="smoke DAV real desactivado (export OSINT_WEB_DAV_SMOKE=1 para correrlo)", ) def test_smoke_dav_real(vault): """Smoke contra el Xandikos real: ≥1 contacto y ≥1 evento. Requiere red + pass.""" app = srv.create_app(vault) real_client = TestClient(app) rc = real_client.get("/api/contacts") assert rc.status_code == 200 assert rc.json()["status"] == "ok" and rc.json()["count"] >= 1 re_ = real_client.get("/api/calendar") assert re_.status_code == 200 and re_.json()["status"] == "ok"