2a5127fcaf
El campo `notes` es lo que el usuario escribe en el panel Note del
Inspector (doble click sobre el nodo) — sitio canonico para texto
largo. Antes los enrichers leian metadata.text/description/query como
prioridad, dejando notes ignorado y forzando al usuario a inyectar
texto via la UI metadata-extra (poco descubrible).
Cambios:
- Ambos run.py abren la BD y leen `entities.notes` por SQL antes de
fallback a node_name. metadata.text/description/query ya no se
consultan (KISS — solo notes y name).
- conftest.make_node admite kwarg `notes` para inyectar contenido
en la columna notes desde tests.
- Tests actualizados: SAMPLE_TEXT y los IoC dumps van por `notes=`
en lugar de `metadata={"text": ...}`.
- Renombrado el test que verificaba prioridad: ahora se llama
`*_uses_notes_priority` y verifica notes > name.
Tests verdes WSL (44) y Windows (33 + 11 skipped).
334 lines
11 KiB
Python
334 lines
11 KiB
Python
"""Fixtures comunes para tests de enrichers de graph_explorer.
|
|
|
|
Cada test recibe:
|
|
- `ops_db`: path a una operations.db con schema minimo en tmp dir
|
|
- `app_dir`: tmp dir que actua como app_dir (cache_dir = <app_dir>/cache)
|
|
- `registry_root`: ruta absoluta del registry (para imports en run.py)
|
|
- `run_enricher(enricher, ctx_overrides)`: helper que invoca run.py via
|
|
subprocess con el mismo wire protocol que jobs.cpp.
|
|
|
|
El schema se replica de `fn_operations/project_template/operations.db` —
|
|
solo las columnas que usan los enrichers. Si fn_operations cambia el
|
|
schema, este conftest se actualiza.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import os
|
|
import sqlite3
|
|
import subprocess
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
|
|
APP_DIR_SRC = Path(__file__).resolve().parents[1] # graph_explorer/
|
|
TESTS_DIR = Path(__file__).resolve().parent
|
|
STUBS_DIR = TESTS_DIR / "_stubs"
|
|
|
|
# Los enrichers viven en `<app>/enrichers/` en el repo dev y en
|
|
# `<app>/assets/enrichers/` en la carpeta portable de Windows
|
|
# (convencion `assets/` desde el ADR de feb-2026). Detectar cual
|
|
# existe y usar ese.
|
|
def _resolve_enrichers_dir() -> Path:
|
|
cands = [
|
|
APP_DIR_SRC / "enrichers",
|
|
APP_DIR_SRC / "assets" / "enrichers",
|
|
]
|
|
for c in cands:
|
|
if c.is_dir():
|
|
return c
|
|
# Default a la primera para mensajes de error consistentes con el dev layout.
|
|
return cands[0]
|
|
|
|
|
|
ENRICHERS_DIR = _resolve_enrichers_dir()
|
|
|
|
|
|
def _resolve_registry_root() -> Path:
|
|
"""Sube desde el directorio de tests buscando un marker del registry.
|
|
|
|
En el repo: APP_DIR/projects/osint_graph/apps/graph_explorer/tests
|
|
-> 5 niveles arriba esta fn_registry/. En la carpeta de Windows
|
|
(Desktop/apps/graph_explorer/tests) NO hay registry — usamos el
|
|
propio app dir como fallback. Los tests no leen registry.db; solo
|
|
se pasa registry_root via ctx por compatibilidad con run.py.
|
|
"""
|
|
# Marker fiable: fichero `cmd/fn/main.go` o `registry.db`.
|
|
p = APP_DIR_SRC
|
|
for _ in range(8):
|
|
if (p / "cmd" / "fn" / "main.go").exists() or \
|
|
(p / "registry.db").exists():
|
|
return p
|
|
if p.parent == p:
|
|
break
|
|
p = p.parent
|
|
# Sin registry: usa el app dir como pseudo-root. Los tests funcionan
|
|
# igual mientras no haya un test que importe paquetes del registry.
|
|
return APP_DIR_SRC
|
|
|
|
|
|
REGISTRY_ROOT = _resolve_registry_root()
|
|
|
|
|
|
def _resolve_python_bin() -> Path:
|
|
"""Elige el Python con el que ejecutar los enrichers.
|
|
|
|
Prioridad (cubre Linux/WSL dev y Windows portable instalado):
|
|
1. $FN_TEST_PYTHON env override
|
|
2. <app>/assets/runtime/python/python.exe (Windows portable, solo Windows)
|
|
3. <app>/runtime/python/python.exe (legacy, solo Windows)
|
|
4. <registry>/python/.venv/bin/python3 (WSL dev venv)
|
|
5. sys.executable (whatever runs pytest)
|
|
|
|
Los candidatos `python.exe` solo se aceptan si corremos en Windows
|
|
nativo. En WSL/Linux pueden existir vendored en el repo (los
|
|
distribuibles), pero no son ejecutables en este OS.
|
|
"""
|
|
env = os.environ.get("FN_TEST_PYTHON")
|
|
if env and Path(env).exists():
|
|
return Path(env)
|
|
is_windows = sys.platform.startswith("win")
|
|
cands: list[Path] = []
|
|
if is_windows:
|
|
cands += [
|
|
APP_DIR_SRC / "assets" / "runtime" / "python" / "python.exe",
|
|
APP_DIR_SRC / "runtime" / "python" / "python.exe",
|
|
]
|
|
cands += [REGISTRY_ROOT / "python" / ".venv" / "bin" / "python3"]
|
|
for c in cands:
|
|
if c.exists():
|
|
return c
|
|
return Path(sys.executable)
|
|
|
|
|
|
PYTHON_BIN = _resolve_python_bin()
|
|
|
|
|
|
def stub_requests(tmp_path: Path, plan: dict) -> dict:
|
|
"""Escribe el plan de respuestas y devuelve el env que activa el stub.
|
|
|
|
Devuelve dos vias por las que `_runner.py` y un Python no-embedded
|
|
pueden inyectar el stub:
|
|
- `PYTHONPATH`: la ruta estandar; respeta el orden y el resto del
|
|
entorno. Funciona en Linux y en Python full instalado (no-embed).
|
|
- `_STUB_PATHS`: lo lee `_runner.py` y hace `sys.path.insert(0, ...)`.
|
|
Necesario en el Python embebido de Windows, que ignora
|
|
PYTHONPATH (lo controla `python312._pth`).
|
|
Plan acepta `default` y/o `match` (lista de {contains, status, text}).
|
|
"""
|
|
plan_file = tmp_path / "_stub_plan.json"
|
|
plan_file.write_text(json.dumps(plan), encoding="utf-8")
|
|
return {
|
|
"PYTHONPATH": str(STUBS_DIR) + os.pathsep + os.environ.get("PYTHONPATH", ""),
|
|
"_STUB_PATHS": str(STUBS_DIR),
|
|
"_STUB_REQUESTS_PLAN": str(plan_file),
|
|
}
|
|
|
|
|
|
SCHEMA_SQL = """
|
|
CREATE TABLE entities (
|
|
id TEXT PRIMARY KEY,
|
|
name TEXT NOT NULL,
|
|
type_ref TEXT NOT NULL,
|
|
status TEXT NOT NULL DEFAULT 'active',
|
|
description TEXT NOT NULL DEFAULT '',
|
|
domain TEXT NOT NULL DEFAULT '',
|
|
tags TEXT NOT NULL DEFAULT '[]',
|
|
source TEXT NOT NULL,
|
|
metadata TEXT NOT NULL DEFAULT '{}',
|
|
notes TEXT NOT NULL DEFAULT '',
|
|
group_id TEXT,
|
|
created_at TEXT NOT NULL,
|
|
updated_at TEXT NOT NULL
|
|
);
|
|
CREATE TABLE relations (
|
|
id TEXT PRIMARY KEY,
|
|
name TEXT NOT NULL,
|
|
from_entity TEXT NOT NULL DEFAULT '',
|
|
to_entity TEXT NOT NULL,
|
|
via TEXT NOT NULL DEFAULT '',
|
|
description TEXT NOT NULL DEFAULT '',
|
|
purity TEXT NOT NULL DEFAULT '',
|
|
direction TEXT NOT NULL DEFAULT 'unidirectional',
|
|
weight REAL,
|
|
status TEXT NOT NULL DEFAULT 'designed',
|
|
started_at TEXT,
|
|
ended_at TEXT,
|
|
"order" INTEGER,
|
|
tags TEXT NOT NULL DEFAULT '[]',
|
|
notes TEXT NOT NULL DEFAULT '',
|
|
created_at TEXT NOT NULL,
|
|
updated_at TEXT NOT NULL
|
|
);
|
|
"""
|
|
|
|
|
|
@pytest.fixture
|
|
def ops_db(tmp_path):
|
|
"""operations.db vacia con schema minimo, lista para insertar nodos."""
|
|
db = tmp_path / "operations.db"
|
|
conn = sqlite3.connect(db)
|
|
conn.executescript(SCHEMA_SQL)
|
|
conn.commit()
|
|
conn.close()
|
|
return db
|
|
|
|
|
|
@pytest.fixture
|
|
def app_dir(tmp_path):
|
|
"""Directorio raiz de una 'app' para los enrichers (cache va dentro)."""
|
|
d = tmp_path / "app"
|
|
d.mkdir()
|
|
(d / "cache").mkdir()
|
|
return d
|
|
|
|
|
|
@pytest.fixture
|
|
def registry_root():
|
|
return REGISTRY_ROOT
|
|
|
|
|
|
def make_node(ops_db: Path, *, node_id: str, name: str, type_ref: str,
|
|
metadata: dict | None = None, source: str = "test",
|
|
notes: str = "") -> None:
|
|
"""Inserta un nodo de tipo arbitrario en operations.db.
|
|
|
|
`notes` se mapea a la columna `entities.notes` — es lo que el
|
|
panel Note del Inspector edita en la app real, y los enrichers
|
|
`split_sentences` / `extract_iocs_text` lo leen como fuente de
|
|
texto canonica.
|
|
"""
|
|
conn = sqlite3.connect(ops_db)
|
|
conn.execute(
|
|
"INSERT INTO entities (id, name, type_ref, source, metadata, "
|
|
" notes, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, "
|
|
" '2026-01-01T00:00:00Z', '2026-01-01T00:00:00Z')",
|
|
(node_id, name, type_ref, source,
|
|
json.dumps(metadata or {}, ensure_ascii=False), notes),
|
|
)
|
|
conn.commit()
|
|
conn.close()
|
|
|
|
|
|
def get_entity(ops_db: Path, entity_id: str) -> dict | None:
|
|
conn = sqlite3.connect(ops_db)
|
|
try:
|
|
cur = conn.execute(
|
|
"SELECT id, name, type_ref, source, metadata "
|
|
"FROM entities WHERE id=?", (entity_id,))
|
|
row = cur.fetchone()
|
|
finally:
|
|
conn.close()
|
|
if not row:
|
|
return None
|
|
md = {}
|
|
try:
|
|
md = json.loads(row[4]) if row[4] else {}
|
|
except Exception:
|
|
pass
|
|
return {"id": row[0], "name": row[1], "type_ref": row[2],
|
|
"source": row[3], "metadata": md}
|
|
|
|
|
|
def list_entities(ops_db: Path, type_ref: str | None = None) -> list[dict]:
|
|
conn = sqlite3.connect(ops_db)
|
|
try:
|
|
if type_ref:
|
|
cur = conn.execute(
|
|
"SELECT id, name, type_ref, source, metadata, group_id "
|
|
"FROM entities WHERE type_ref=? ORDER BY id", (type_ref,))
|
|
else:
|
|
cur = conn.execute(
|
|
"SELECT id, name, type_ref, source, metadata, group_id "
|
|
"FROM entities ORDER BY id")
|
|
rows = cur.fetchall()
|
|
finally:
|
|
conn.close()
|
|
out = []
|
|
for r in rows:
|
|
try:
|
|
md = json.loads(r[4]) if r[4] else {}
|
|
except Exception:
|
|
md = {}
|
|
out.append({"id": r[0], "name": r[1], "type_ref": r[2],
|
|
"source": r[3], "metadata": md, "group_id": r[5]})
|
|
return out
|
|
|
|
|
|
def list_relations(ops_db: Path, name: str | None = None) -> list[dict]:
|
|
conn = sqlite3.connect(ops_db)
|
|
try:
|
|
if name:
|
|
cur = conn.execute(
|
|
"SELECT id, name, from_entity, to_entity FROM relations "
|
|
"WHERE name=? ORDER BY id", (name,))
|
|
else:
|
|
cur = conn.execute(
|
|
"SELECT id, name, from_entity, to_entity FROM relations "
|
|
"ORDER BY id")
|
|
rows = cur.fetchall()
|
|
finally:
|
|
conn.close()
|
|
return [{"id": r[0], "name": r[1], "from_entity": r[2], "to_entity": r[3]}
|
|
for r in rows]
|
|
|
|
|
|
def run_enricher(enricher_id: str, ctx: dict, *, env: dict | None = None,
|
|
timeout: int = 30) -> tuple[int, dict | None, str]:
|
|
"""Lanza enrichers/<id>/run.py con el wire protocol estandar.
|
|
|
|
Usa siempre el trampoline `_runner.py` para que el stub de
|
|
requests se inyecte tanto con PYTHONPATH (Python normal) como con
|
|
`_STUB_PATHS` (Python embebido de Windows que ignora PYTHONPATH).
|
|
|
|
Returns: (exit_code, stdout_json_or_None, stderr_text)
|
|
"""
|
|
run_py = ENRICHERS_DIR / enricher_id / "run.py"
|
|
assert run_py.exists(), f"no existe {run_py}"
|
|
runner = TESTS_DIR / "_runner.py"
|
|
assert runner.exists(), f"no existe {runner}"
|
|
|
|
full_env = os.environ.copy()
|
|
if env:
|
|
full_env.update(env)
|
|
|
|
proc = subprocess.run(
|
|
[str(PYTHON_BIN), str(runner), str(run_py)],
|
|
input=json.dumps(ctx),
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=timeout,
|
|
env=full_env,
|
|
)
|
|
parsed: dict | None = None
|
|
if proc.stdout.strip():
|
|
# Ultima linea no vacia es el JSON resumen.
|
|
for line in reversed(proc.stdout.strip().splitlines()):
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
try:
|
|
parsed = json.loads(line)
|
|
except Exception:
|
|
pass
|
|
break
|
|
return proc.returncode, parsed, proc.stderr
|
|
|
|
|
|
def base_ctx(*, ops_db, app_dir, registry_root, node_id, node_name,
|
|
node_type, metadata=None, params=None) -> dict:
|
|
"""Construye el ctx tipico que jobs.cpp pasa por stdin."""
|
|
return {
|
|
"node_id": node_id,
|
|
"node_name": node_name,
|
|
"node_type": node_type,
|
|
"metadata": metadata or {},
|
|
"ops_db_path": str(ops_db),
|
|
"app_dir": str(app_dir),
|
|
"cache_dir": str(Path(app_dir) / "cache"),
|
|
"registry_root": str(registry_root),
|
|
"params": params or {},
|
|
}
|