feat: initial scaffold of osint_db (DuckDB source-of-truth service)
This commit is contained in:
@@ -0,0 +1,356 @@
|
||||
"""Tests del service osint_db: migraciones, ingest del vault, API y render.
|
||||
|
||||
Todo corre contra un vault temporal y una base DuckDB temporal, SIN red: el
|
||||
ingest DAV no se ejercita aquí (requiere Xandikos + pass). El enlace
|
||||
contacto→ficha sí se prueba insertando un contacto a mano y relanzando el
|
||||
ingest del vault, que re-enlaza.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
from datetime import datetime, timezone
|
||||
|
||||
import pytest
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from server.config import Config # noqa: E402
|
||||
from server.db import apply_migrations, write_conn # noqa: E402
|
||||
from server.main import create_app # noqa: E402
|
||||
|
||||
PERSONA_MD = """---
|
||||
tipo: persona
|
||||
nombre: "Ana García Pérez"
|
||||
slug: ana-garcia-perez
|
||||
aliases: ["Anita"]
|
||||
sexo: mujer
|
||||
fecha_nacimiento: 1990-04-12
|
||||
dni: 12345678Z
|
||||
telefono: "+34 600 111 222"
|
||||
email: ana@example.com
|
||||
direccion: null
|
||||
pais: españa
|
||||
relaciones: []
|
||||
contexto: familia
|
||||
fuente: "test fixture"
|
||||
tags: [persona, osint]
|
||||
---
|
||||
|
||||
## Notas
|
||||
Ficha de prueba.
|
||||
"""
|
||||
|
||||
PERSONA2_MD = """---
|
||||
tipo: persona
|
||||
nombre: "Luis Pérez"
|
||||
slug: luis-perez
|
||||
aliases: []
|
||||
sexo: hombre
|
||||
fecha_nacimiento: null
|
||||
dni: null
|
||||
telefono: null
|
||||
email: null
|
||||
direccion: null
|
||||
pais: null
|
||||
relaciones: []
|
||||
contexto: movil
|
||||
fuente: "Xandikos UID abc-123"
|
||||
tags: [persona, osint, movil]
|
||||
---
|
||||
|
||||
## Notas
|
||||
"""
|
||||
|
||||
ORG_MD = """---
|
||||
tipo: organizacion
|
||||
nombre: "Acme S.L."
|
||||
slug: acme-sl
|
||||
tags: [organizacion, osint]
|
||||
---
|
||||
|
||||
## Notas
|
||||
"""
|
||||
|
||||
DOC_MD = """---
|
||||
tipo: documento
|
||||
doc_tipo: dni
|
||||
---
|
||||
|
||||
Sub-nota de documento (NO debe contar como ficha de persona).
|
||||
"""
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def cfg(tmp_path):
|
||||
"""Vault temporal con fichas de fixture + base DuckDB temporal migrada."""
|
||||
vault = tmp_path / "vault"
|
||||
(vault / "personas" / "ana-garcia-perez").mkdir(parents=True)
|
||||
(vault / "organizaciones").mkdir()
|
||||
(vault / "personas" / "ana-garcia-perez.md").write_text(
|
||||
PERSONA_MD, encoding="utf-8"
|
||||
)
|
||||
(vault / "personas" / "luis-perez.md").write_text(PERSONA2_MD, encoding="utf-8")
|
||||
(vault / "personas" / "_plantilla.md").write_text(
|
||||
"---\ntipo: plantilla\n---\n", encoding="utf-8"
|
||||
)
|
||||
(vault / "personas" / "ana-garcia-perez" / "dni.md").write_text(
|
||||
DOC_MD, encoding="utf-8"
|
||||
)
|
||||
(vault / "organizaciones" / "acme-sl.md").write_text(ORG_MD, encoding="utf-8")
|
||||
|
||||
config = Config(
|
||||
vault_dir=str(vault),
|
||||
db_path=str(tmp_path / "data" / "osint.duckdb"),
|
||||
port=0,
|
||||
)
|
||||
apply_migrations(config.db_path)
|
||||
return config
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def client(cfg):
|
||||
return TestClient(create_app(cfg))
|
||||
|
||||
|
||||
def test_migrations_son_idempotentes(cfg):
|
||||
"""La segunda pasada de migraciones no aplica nada (tabla _migrations)."""
|
||||
assert apply_migrations(cfg.db_path) == []
|
||||
|
||||
|
||||
def test_health(client, cfg):
|
||||
r = client.get("/api/health").json()
|
||||
assert r["status"] == "ok"
|
||||
assert r["db_path"] == cfg.db_path
|
||||
assert r["tables"] >= 8
|
||||
|
||||
|
||||
def test_ingest_vault_cuenta_entidades(client):
|
||||
r = client.post("/api/ingest/vault").json()
|
||||
assert r["status"] == "ok"
|
||||
# 5 notas: 2 personas + plantilla + sub-nota documento + organización.
|
||||
assert r["notes"] == 5
|
||||
# Solo las fichas de nivel-1 sin prefijo _ cuentan como persona.
|
||||
assert r["persons"] == 2
|
||||
assert r["organizations"] == 1
|
||||
assert r["domains"] == 0
|
||||
assert sorted(r["derived_rebuilt"]) == [
|
||||
"derived.contact_link_quality",
|
||||
"derived.event_monthly",
|
||||
"derived.person_stats",
|
||||
]
|
||||
|
||||
|
||||
def test_ingest_vault_extrae_dav_uid_de_fuente(client):
|
||||
client.post("/api/ingest/vault")
|
||||
r = client.post(
|
||||
"/api/query",
|
||||
json={"sql": "SELECT dav_uid FROM persons WHERE slug = 'luis-perez'"},
|
||||
).json()
|
||||
assert r["status"] == "ok"
|
||||
assert r["rows"][0]["dav_uid"] == "abc-123"
|
||||
|
||||
|
||||
def test_api_query_ok_y_error_siempre_http_200(client):
|
||||
client.post("/api/ingest/vault")
|
||||
ok = client.post(
|
||||
"/api/query",
|
||||
json={"sql": "SELECT slug, nombre FROM persons ORDER BY slug", "max_rows": 10},
|
||||
)
|
||||
assert ok.status_code == 200
|
||||
body = ok.json()
|
||||
assert body["status"] == "ok"
|
||||
assert body["columns"] == ["slug", "nombre"]
|
||||
assert body["row_count"] == 2
|
||||
assert body["truncated"] is False
|
||||
assert body["rows"][0]["slug"] == "ana-garcia-perez"
|
||||
|
||||
err = client.post("/api/query", json={"sql": "SELECT * FROM tabla_que_no_existe"})
|
||||
assert err.status_code == 200
|
||||
assert err.json()["status"] == "error"
|
||||
assert err.json()["error"]
|
||||
|
||||
|
||||
def test_api_query_es_solo_lectura(client):
|
||||
client.post("/api/ingest/vault")
|
||||
r = client.post(
|
||||
"/api/query", json={"sql": "DELETE FROM persons"}
|
||||
).json()
|
||||
assert r["status"] == "error"
|
||||
|
||||
|
||||
def test_catalogo_de_queries_con_nombre(client):
|
||||
r = client.get("/api/queries").json()
|
||||
assert r["status"] == "ok"
|
||||
names = {q["name"] for q in r["queries"]}
|
||||
assert {
|
||||
"personas_por_contexto",
|
||||
"personas_recientes",
|
||||
"eventos_proximos",
|
||||
"contactos_sin_nota",
|
||||
"stats_personas",
|
||||
} <= names
|
||||
assert all(q["sql"] and q["description"] for q in r["queries"])
|
||||
|
||||
|
||||
def test_query_named_ok_y_desconocida(client):
|
||||
client.post("/api/ingest/vault")
|
||||
r = client.post(
|
||||
"/api/query/named", json={"name": "personas_por_contexto"}
|
||||
).json()
|
||||
assert r["status"] == "ok"
|
||||
contextos = {row["contexto"]: row["personas"] for row in r["rows"]}
|
||||
assert contextos == {"familia": 1, "movil": 1}
|
||||
|
||||
bad = client.post("/api/query/named", json={"name": "no_existe"}).json()
|
||||
assert bad["status"] == "error"
|
||||
|
||||
|
||||
def test_tables_inventario(client):
|
||||
client.post("/api/ingest/vault")
|
||||
r = client.get("/api/tables").json()
|
||||
assert r["status"] == "ok"
|
||||
by_name = {(t["schema"], t["name"]): t for t in r["tables"]}
|
||||
persons = by_name[("main", "persons")]
|
||||
assert persons["kind"] == "master"
|
||||
assert persons["row_count"] == 2
|
||||
assert {"name": "note_path", "type": "VARCHAR"} in persons["columns"]
|
||||
stats = by_name[("derived", "person_stats")]
|
||||
assert stats["kind"] == "derived"
|
||||
assert ("main", "_migrations") not in by_name
|
||||
|
||||
|
||||
def test_derivadas_sin_note_path(client):
|
||||
"""Regla dura: ninguna tabla del schema derived referencia notas."""
|
||||
client.post("/api/ingest/vault")
|
||||
r = client.post(
|
||||
"/api/query",
|
||||
json={
|
||||
"sql": (
|
||||
"SELECT table_name, column_name FROM information_schema.columns "
|
||||
"WHERE table_schema = 'derived' AND column_name LIKE '%note%'"
|
||||
)
|
||||
},
|
||||
).json()
|
||||
assert r["status"] == "ok"
|
||||
assert r["rows"] == []
|
||||
# Y las tres derivadas existen de verdad.
|
||||
t = client.post(
|
||||
"/api/query",
|
||||
json={
|
||||
"sql": (
|
||||
"SELECT table_name FROM information_schema.tables "
|
||||
"WHERE table_schema = 'derived' ORDER BY table_name"
|
||||
)
|
||||
},
|
||||
).json()
|
||||
assert [row["table_name"] for row in t["rows"]] == [
|
||||
"contact_link_quality",
|
||||
"event_monthly",
|
||||
"person_stats",
|
||||
]
|
||||
|
||||
|
||||
def test_link_contacts_por_telefono(client, cfg):
|
||||
"""Un contacto con teléfono que casa con una ficha queda enlazado al re-ingestar."""
|
||||
client.post("/api/ingest/vault")
|
||||
now = datetime.now(tz=timezone.utc)
|
||||
with write_conn(cfg.db_path) as conn:
|
||||
conn.execute(
|
||||
"INSERT INTO contacts VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
||||
[
|
||||
"uid-movil-1",
|
||||
"/enmanuel/contacts/addressbook/",
|
||||
"etag1",
|
||||
"Ana G.",
|
||||
'["600111222"]',
|
||||
"[]",
|
||||
"BEGIN:VCARD...",
|
||||
None,
|
||||
now,
|
||||
],
|
||||
)
|
||||
conn.execute(
|
||||
"INSERT INTO contacts VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
||||
[
|
||||
"uid-movil-2",
|
||||
"/enmanuel/contacts/addressbook/",
|
||||
"etag2",
|
||||
"Desconocido",
|
||||
'["699999999"]',
|
||||
"[]",
|
||||
"BEGIN:VCARD...",
|
||||
None,
|
||||
now,
|
||||
],
|
||||
)
|
||||
# El ingest del vault re-enlaza contacts y reconstruye derivadas.
|
||||
client.post("/api/ingest/vault")
|
||||
r = client.post(
|
||||
"/api/query",
|
||||
json={"sql": "SELECT uid, note_path FROM contacts ORDER BY uid"},
|
||||
).json()
|
||||
rows = {row["uid"]: row["note_path"] for row in r["rows"]}
|
||||
assert rows["uid-movil-1"] == os.path.join("personas", "ana-garcia-perez.md")
|
||||
assert rows["uid-movil-2"] is None
|
||||
|
||||
q = client.post("/api/query/named", json={"name": "contactos_sin_nota"}).json()
|
||||
assert [row["uid"] for row in q["rows"]] == ["uid-movil-2"]
|
||||
|
||||
quality = client.post(
|
||||
"/api/query/named", json={"name": "calidad_enlace_contactos"}
|
||||
).json()
|
||||
assert quality["rows"] == [{"total": 2, "linked": 1, "unlinked": 1}]
|
||||
|
||||
|
||||
def test_render_note_crea_bloque_sentinel_y_es_idempotente(client, cfg):
|
||||
client.post("/api/ingest/vault")
|
||||
body = {
|
||||
"note_path": "tableros/personas.md",
|
||||
"block_id": "personas",
|
||||
"query": "personas_por_contexto",
|
||||
"title": "Personas por contexto",
|
||||
}
|
||||
r = client.post("/api/render/note", json=body).json()
|
||||
assert r["status"] == "ok"
|
||||
assert r["note_path"] == "tableros/personas.md"
|
||||
assert r["rows_rendered"] == 2
|
||||
|
||||
note_file = os.path.join(cfg.vault_dir, "tableros", "personas.md")
|
||||
content = open(note_file, encoding="utf-8").read()
|
||||
assert "<!-- osintdb:begin id=personas -->" in content
|
||||
assert "<!-- osintdb:end id=personas -->" in content
|
||||
assert "### Personas por contexto" in content
|
||||
assert "| contexto | personas |" in content
|
||||
assert "| familia | 1 |" in content
|
||||
|
||||
# Idempotente: un segundo render no duplica el bloque ni la tabla.
|
||||
r2 = client.post("/api/render/note", json=body).json()
|
||||
assert r2["status"] == "ok"
|
||||
content2 = open(note_file, encoding="utf-8").read()
|
||||
assert content2.count("<!-- osintdb:begin id=personas -->") == 1
|
||||
assert content2.count("| familia | 1 |") == 1
|
||||
|
||||
|
||||
def test_render_note_valida_inputs(client):
|
||||
client.post("/api/ingest/vault")
|
||||
# Ni sql ni query.
|
||||
r = client.post(
|
||||
"/api/render/note", json={"note_path": "t.md", "block_id": "x"}
|
||||
).json()
|
||||
assert r["status"] == "error"
|
||||
# Query con nombre desconocida.
|
||||
r = client.post(
|
||||
"/api/render/note",
|
||||
json={"note_path": "t.md", "block_id": "x", "query": "nope"},
|
||||
).json()
|
||||
assert r["status"] == "error"
|
||||
# Path traversal fuera del vault.
|
||||
r = client.post(
|
||||
"/api/render/note",
|
||||
json={"note_path": "../fuera.md", "block_id": "x", "query": "stats_personas"},
|
||||
).json()
|
||||
assert r["status"] == "error"
|
||||
assert "fuera del vault" in r["error"]
|
||||
Reference in New Issue
Block a user