"""Tests del service osint_db: migraciones, ingest del vault, API y render. Todo corre contra un vault temporal y una base DuckDB temporal, SIN red: el ingest DAV no se ejercita aquí (requiere Xandikos + pass). El enlace contacto→ficha sí se prueba insertando un contacto a mano y relanzando el ingest del vault, que re-enlaza. """ from __future__ import annotations import os import sys from datetime import datetime, timezone import pytest from fastapi.testclient import TestClient sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from server.config import Config # noqa: E402 from server.db import apply_migrations, write_conn # noqa: E402 from server.main import create_app # noqa: E402 PERSONA_MD = """--- tipo: persona nombre: "Ana García Pérez" slug: ana-garcia-perez aliases: ["Anita"] sexo: mujer fecha_nacimiento: 1990-04-12 dni: 12345678Z telefono: "+34 600 111 222" email: ana@example.com direccion: null pais: españa relaciones: [] contexto: familia fuente: "test fixture" tags: [persona, osint] --- ## Notas Ficha de prueba. """ PERSONA2_MD = """--- tipo: persona nombre: "Luis Pérez" slug: luis-perez aliases: [] sexo: hombre fecha_nacimiento: null dni: null telefono: null email: null direccion: null pais: null relaciones: [] contexto: movil fuente: "Xandikos UID abc-123" tags: [persona, osint, movil] --- ## Notas """ ORG_MD = """--- tipo: organizacion nombre: "Acme S.L." slug: acme-sl tags: [organizacion, osint] --- ## Notas """ DOC_MD = """--- tipo: documento doc_tipo: dni --- Sub-nota de documento (NO debe contar como ficha de persona). """ @pytest.fixture() def cfg(tmp_path): """Vault temporal con fichas de fixture + base DuckDB temporal migrada.""" vault = tmp_path / "vault" (vault / "personas" / "ana-garcia-perez").mkdir(parents=True) (vault / "organizaciones").mkdir() (vault / "personas" / "ana-garcia-perez.md").write_text( PERSONA_MD, encoding="utf-8" ) (vault / "personas" / "luis-perez.md").write_text(PERSONA2_MD, encoding="utf-8") (vault / "personas" / "_plantilla.md").write_text( "---\ntipo: plantilla\n---\n", encoding="utf-8" ) (vault / "personas" / "ana-garcia-perez" / "dni.md").write_text( DOC_MD, encoding="utf-8" ) (vault / "organizaciones" / "acme-sl.md").write_text(ORG_MD, encoding="utf-8") config = Config( vault_dir=str(vault), db_path=str(tmp_path / "data" / "osint.duckdb"), port=0, ) apply_migrations(config.db_path) return config @pytest.fixture() def client(cfg): return TestClient(create_app(cfg)) def test_migrations_son_idempotentes(cfg): """La segunda pasada de migraciones no aplica nada (tabla _migrations).""" assert apply_migrations(cfg.db_path) == [] def test_health(client, cfg): r = client.get("/api/health").json() assert r["status"] == "ok" assert r["db_path"] == cfg.db_path assert r["tables"] >= 8 def test_ingest_vault_cuenta_entidades(client): r = client.post("/api/ingest/vault").json() assert r["status"] == "ok" # 5 notas: 2 personas + plantilla + sub-nota documento + organización. assert r["notes"] == 5 # Solo las fichas de nivel-1 sin prefijo _ cuentan como persona. assert r["persons"] == 2 assert r["organizations"] == 1 assert r["domains"] == 0 assert sorted(r["derived_rebuilt"]) == [ "derived.contact_link_quality", "derived.event_monthly", "derived.person_stats", ] def test_ingest_vault_extrae_dav_uid_de_fuente(client): client.post("/api/ingest/vault") r = client.post( "/api/query", json={"sql": "SELECT dav_uid FROM persons WHERE slug = 'luis-perez'"}, ).json() assert r["status"] == "ok" assert r["rows"][0]["dav_uid"] == "abc-123" def test_api_query_ok_y_error_siempre_http_200(client): client.post("/api/ingest/vault") ok = client.post( "/api/query", json={"sql": "SELECT slug, nombre FROM persons ORDER BY slug", "max_rows": 10}, ) assert ok.status_code == 200 body = ok.json() assert body["status"] == "ok" assert body["columns"] == ["slug", "nombre"] assert body["row_count"] == 2 assert body["truncated"] is False assert body["rows"][0]["slug"] == "ana-garcia-perez" err = client.post("/api/query", json={"sql": "SELECT * FROM tabla_que_no_existe"}) assert err.status_code == 200 assert err.json()["status"] == "error" assert err.json()["error"] def test_api_query_es_solo_lectura(client): client.post("/api/ingest/vault") r = client.post( "/api/query", json={"sql": "DELETE FROM persons"} ).json() assert r["status"] == "error" def test_catalogo_de_queries_con_nombre(client): r = client.get("/api/queries").json() assert r["status"] == "ok" names = {q["name"] for q in r["queries"]} assert { "personas_por_contexto", "personas_recientes", "eventos_proximos", "contactos_sin_nota", "stats_personas", } <= names assert all(q["sql"] and q["description"] for q in r["queries"]) def test_query_named_ok_y_desconocida(client): client.post("/api/ingest/vault") r = client.post( "/api/query/named", json={"name": "personas_por_contexto"} ).json() assert r["status"] == "ok" contextos = {row["contexto"]: row["personas"] for row in r["rows"]} assert contextos == {"familia": 1, "movil": 1} bad = client.post("/api/query/named", json={"name": "no_existe"}).json() assert bad["status"] == "error" def test_tables_inventario(client): client.post("/api/ingest/vault") r = client.get("/api/tables").json() assert r["status"] == "ok" by_name = {(t["schema"], t["name"]): t for t in r["tables"]} persons = by_name[("main", "persons")] assert persons["kind"] == "master" assert persons["row_count"] == 2 assert {"name": "note_path", "type": "VARCHAR"} in persons["columns"] stats = by_name[("derived", "person_stats")] assert stats["kind"] == "derived" assert ("main", "_migrations") not in by_name def test_derivadas_sin_note_path(client): """Regla dura: ninguna tabla del schema derived referencia notas.""" client.post("/api/ingest/vault") r = client.post( "/api/query", json={ "sql": ( "SELECT table_name, column_name FROM information_schema.columns " "WHERE table_schema = 'derived' AND column_name LIKE '%note%'" ) }, ).json() assert r["status"] == "ok" assert r["rows"] == [] # Y las tres derivadas existen de verdad. t = client.post( "/api/query", json={ "sql": ( "SELECT table_name FROM information_schema.tables " "WHERE table_schema = 'derived' ORDER BY table_name" ) }, ).json() assert [row["table_name"] for row in t["rows"]] == [ "contact_link_quality", "event_monthly", "person_stats", ] def test_link_contacts_por_telefono(client, cfg): """Un contacto con teléfono que casa con una ficha queda enlazado al re-ingestar.""" client.post("/api/ingest/vault") now = datetime.now(tz=timezone.utc) with write_conn(cfg.db_path) as conn: conn.execute( "INSERT INTO contacts VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)", [ "uid-movil-1", "/enmanuel/contacts/addressbook/", "etag1", "Ana G.", '["600111222"]', "[]", "BEGIN:VCARD...", None, now, ], ) conn.execute( "INSERT INTO contacts VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)", [ "uid-movil-2", "/enmanuel/contacts/addressbook/", "etag2", "Desconocido", '["699999999"]', "[]", "BEGIN:VCARD...", None, now, ], ) # El ingest del vault re-enlaza contacts y reconstruye derivadas. client.post("/api/ingest/vault") r = client.post( "/api/query", json={"sql": "SELECT uid, note_path FROM contacts ORDER BY uid"}, ).json() rows = {row["uid"]: row["note_path"] for row in r["rows"]} assert rows["uid-movil-1"] == os.path.join("personas", "ana-garcia-perez.md") assert rows["uid-movil-2"] is None q = client.post("/api/query/named", json={"name": "contactos_sin_nota"}).json() assert [row["uid"] for row in q["rows"]] == ["uid-movil-2"] quality = client.post( "/api/query/named", json={"name": "calidad_enlace_contactos"} ).json() assert quality["rows"] == [{"total": 2, "linked": 1, "unlinked": 1}] def test_render_note_crea_bloque_sentinel_y_es_idempotente(client, cfg): client.post("/api/ingest/vault") body = { "note_path": "tableros/personas.md", "block_id": "personas", "query": "personas_por_contexto", "title": "Personas por contexto", } r = client.post("/api/render/note", json=body).json() assert r["status"] == "ok" assert r["note_path"] == "tableros/personas.md" assert r["rows_rendered"] == 2 note_file = os.path.join(cfg.vault_dir, "tableros", "personas.md") content = open(note_file, encoding="utf-8").read() assert "" in content assert "" in content assert "### Personas por contexto" in content assert "| contexto | personas |" in content assert "| familia | 1 |" in content # Idempotente: un segundo render no duplica el bloque ni la tabla. r2 = client.post("/api/render/note", json=body).json() assert r2["status"] == "ok" content2 = open(note_file, encoding="utf-8").read() assert content2.count("") == 1 assert content2.count("| familia | 1 |") == 1 def test_render_note_valida_inputs(client): client.post("/api/ingest/vault") # Ni sql ni query. r = client.post( "/api/render/note", json={"note_path": "t.md", "block_id": "x"} ).json() assert r["status"] == "error" # Query con nombre desconocida. r = client.post( "/api/render/note", json={"note_path": "t.md", "block_id": "x", "query": "nope"}, ).json() assert r["status"] == "error" # Path traversal fuera del vault. r = client.post( "/api/render/note", json={"note_path": "../fuera.md", "block_id": "x", "query": "stats_personas"}, ).json() assert r["status"] == "error" assert "fuera del vault" in r["error"]