From 652ff6f02c109205edf863dbed7e8f88b6cc9e13 Mon Sep 17 00:00:00 2001 From: Egutierrez Date: Sun, 3 May 2026 16:32:22 +0200 Subject: [PATCH] fix(agent_jobs): queue dir desde GX_APP_DB, no GX_APP_DIR + logs verbosos Bug derivado del fix anterior: gx-cli escribia ficheros JSON en `$GX_APP_DIR/agent_jobs_queue/` (apuntando al repo fuente) mientras main.cpp escaneaba `parent(g_layout_db_path)/agent_jobs_queue/` (install Windows). Dos directorios distintos -> jobs huerfanos. Echo reportaba "encolado" pero el worker nunca veia los ficheros. La causa: chat.cpp setea GX_APP_DIR=/projects/osint_graph/ apps/graph_explorer y GX_APP_DB=/local_files/projects// graph_explorer.db. Dos sitios. Solo APP_DB coincide con donde graph_explorer.exe escanea (parent del .db). Fix: * gx-cli cmd_enricher_run: queue_dir = parent(GX_APP_DB) / agent_jobs_queue. Alineado con main.cpp. * gx-cli: nuevo helper `_log(tag, msg)` que escribe a stderr Y a `/gx-cli.log` para auditoria persistente. Cubre node_create, node_update, node_delete, rel_create, enricher_run. * gx-cli mcp _mcp_log tambien persiste a gx-cli.log. * main.cpp: log el queue scan dir una vez por sesion para detectar mismatches a futuro. * .gitignore: agent_jobs_queue/ y gx-cli.log son runtime, no se commitean. Tests: * test_enricher_run_queue_dir_derives_from_app_db (regresion) configura GX_APP_DB en un dir distinto de GX_APP_DIR y verifica que el JSON aterriza junto a APP_DB. * test_enricher_run_writes_log_to_gx_cli_log valida la auditoria. WSL 81 / Windows 70 + 11 skipped. --- .gitignore | 2 ++ gx-cli | 64 ++++++++++++++++++++++++++++++++++++++++---- main.cpp | 10 +++++++ tests/test_gx_cli.py | 57 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 128 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index c212239..1ef7a81 100644 --- a/.gitignore +++ b/.gitignore @@ -33,3 +33,5 @@ enrichers/*/.vendor.lock # Binarios de enrichers Go (issue 0034) — generados por su build.sh. enrichers/*/run enrichers/*/run.exe +agent_jobs_queue/ +gx-cli.log diff --git a/gx-cli b/gx-cli index 8bf7718..e1536ff 100755 --- a/gx-cli +++ b/gx-cli @@ -59,6 +59,24 @@ def _app_db() -> str: return p +def _log(tag: str, msg: str) -> None: + """Log a stderr y al fichero gx-cli.log junto a app_db (mismo dir + que chat.log y .mutations.marker). El fichero permite auditar lo + que el agente Echo hace cuando algo va mal — `_emit` solo va al + stdout de la herramienta y se pierde en pipelines MCP.""" + line = f"[gx-cli {tag}] {msg}\n" + sys.stderr.write(line) + sys.stderr.flush() + try: + app_db = os.environ.get("GX_APP_DB", "") + if app_db: + log_path = Path(app_db).parent / "gx-cli.log" + with open(log_path, "a", encoding="utf-8") as f: + f.write(f"{_now_iso()} {line}") + except OSError: + pass + + def _emit(payload: dict) -> None: print(json.dumps(payload, ensure_ascii=False, default=str)) @@ -138,6 +156,8 @@ def cmd_node_create(args) -> None: description = args.description or "" notes = args.notes or "" + _log("node_create", + f"name={name!r} type={type_ref} notes_len={len(notes)} id={new_id}") cn = _connect(_ops_db()) try: cn.execute( @@ -148,6 +168,7 @@ def cmd_node_create(args) -> None: ) cn.commit() except sqlite3.IntegrityError as e: + _log("node_create", f"FAILED insert: {e}") _die(f"insert failed: {e}") finally: cn.close() @@ -157,6 +178,7 @@ def cmd_node_create(args) -> None: def cmd_node_delete(args) -> None: + _log("node_delete", f"id={args.id}") cn = _connect(_ops_db()) try: cur = cn.execute("DELETE FROM entities WHERE id = ?", (args.id,)) @@ -166,6 +188,7 @@ def cmd_node_delete(args) -> None: ) cn.commit() if cur.rowcount == 0: + _log("node_delete", f"FAILED not found: {args.id}") _die(f"entity not found: {args.id}", code=2) finally: cn.close() @@ -214,6 +237,8 @@ def cmd_node_update(args) -> None: params.append(_now_iso()) params.append(args.id) + _log("node_update", + f"id={args.id} fields={[s.split(' = ')[0] for s in sets[:-1]]}") cn = _connect(_ops_db()) try: cur = cn.execute( @@ -221,6 +246,7 @@ def cmd_node_update(args) -> None: ) cn.commit() if cur.rowcount == 0: + _log("node_update", f"FAILED not found: {args.id}") _die(f"entity not found: {args.id}", code=2) finally: cn.close() @@ -318,6 +344,8 @@ def cmd_rel_create(args) -> None: new_id = f"rel_{_now_ms()}" ts = _now_iso() name = args.name or "RELATED_TO" + _log("rel_create", + f"from={args.from_id} to={args.to_id} name={name} id={new_id}") cn = _connect(_ops_db()) try: # verifica que existen los endpoints @@ -326,6 +354,7 @@ def cmd_rel_create(args) -> None: "SELECT 1 FROM entities WHERE id = ?", (entity_id,) ).fetchone() if r is None: + _log("rel_create", f"FAILED entity not found: {entity_id}") _die(f"entity not found: {entity_id}", code=2) cn.execute( "INSERT INTO relations (id, name, from_entity, to_entity, " @@ -631,10 +660,16 @@ def cmd_enricher_run(args) -> None: "created_at": _now_ms(), } - app_dir = os.environ.get("GX_APP_DIR", "") - if not app_dir: - _die("GX_APP_DIR env var is empty") - queue_dir = Path(app_dir) / "agent_jobs_queue" + # IMPORTANTE: el queue_dir debe coincidir con el que escanea main.cpp. + # main.cpp usa `parent(g_layout_db_path) / "agent_jobs_queue"`, asi + # que aqui derivamos del path de GX_APP_DB tambien — NO de GX_APP_DIR + # (que apunta al repo fuente). Si los dos no coinciden, gx-cli + # escribe en un sitio y main.cpp escanea otro -> jobs huerfanos. + app_db_path = os.environ.get("GX_APP_DB", "") + if not app_db_path: + _die("GX_APP_DB env var is empty") + queue_dir = Path(app_db_path).parent / "agent_jobs_queue" + sys.stderr.write(f"[gx-cli enricher_run] queue_dir={queue_dir}\n") try: queue_dir.mkdir(parents=True, exist_ok=True) # Atomic write: tmp + rename. main.cpp nunca lee un JSON a medias @@ -644,9 +679,18 @@ def cmd_enricher_run(args) -> None: tmp.write_text(json.dumps(payload, ensure_ascii=False), encoding="utf-8") os.replace(tmp, final) + sys.stderr.write( + f"[gx-cli enricher_run] wrote {final} " + f"(enricher={args.enricher} node={args.node or ''} " + f"req={req_id})\n" + ) except OSError as e: + sys.stderr.write( + f"[gx-cli enricher_run] FAILED to write queue file: {e}\n" + ) _die(f"could not enqueue: {e}") _ok(request_id=req_id, enricher=args.enricher, node=args.node or "", + queue_file=str(final), message="job encolado, lo recoge el panel Jobs") @@ -874,8 +918,18 @@ def _mcp_dispatch(tool_name: str, args: dict) -> dict: def _mcp_log(msg: str) -> None: - sys.stderr.write(f"[gx-cli mcp] {msg}\n") + line = f"[gx-cli mcp] {msg}\n" + sys.stderr.write(line) sys.stderr.flush() + # Persistir tambien a gx-cli.log junto a app_db para auditoria. + try: + app_db = os.environ.get("GX_APP_DB", "") + if app_db: + with open(Path(app_db).parent / "gx-cli.log", "a", + encoding="utf-8") as f: + f.write(f"{_now_iso()} {line}") + except OSError: + pass def cmd_mcp_server(_args) -> None: diff --git a/main.cpp b/main.cpp index 1dcbdcf..c63dfe9 100644 --- a/main.cpp +++ b/main.cpp @@ -1314,6 +1314,16 @@ static void render() { std::filesystem::path(g_layout_db_path).parent_path() / "agent_jobs_queue"; std::error_code ec; + // Log el path una sola vez por sesion para detectar mismatches + // entre lo que escribe gx-cli y lo que escaneamos aqui. + static bool s_logged_queue_dir = false; + if (!s_logged_queue_dir) { + std::fprintf(stdout, + "[chat] agent queue scan dir: %s (exists=%d)\n", + queue_dir.string().c_str(), + std::filesystem::is_directory(queue_dir, ec) ? 1 : 0); + s_logged_queue_dir = true; + } if (std::filesystem::is_directory(queue_dir, ec)) { // Reusamos el sqlite ya en memoria solo para parsear JSON via // json_extract (json1 esta enabled en el build). Sin WAL. diff --git a/tests/test_gx_cli.py b/tests/test_gx_cli.py index dd5c210..91d25c6 100644 --- a/tests/test_gx_cli.py +++ b/tests/test_gx_cli.py @@ -395,6 +395,63 @@ class TestCliEnricherRun: tmp_files = list(queue.glob("*.tmp")) assert tmp_files == [] + def test_enricher_run_queue_dir_derives_from_app_db(self, env_dirs, + tmp_path): + """REGRESION: el queue_dir debe vivir junto a GX_APP_DB (que es + donde main.cpp lo escanea), NO junto a GX_APP_DIR. En el + deploy real chat.cpp setea GX_APP_DIR al repo fuente y + GX_APP_DB al install Windows — direcciones distintas. gx-cli + DEBE alinearse con APP_DB.""" + self._make_enricher(env_dirs, "split_sentences") + + # Mover el GX_APP_DB a un dir diferente, manteniendo GX_APP_DIR + # apuntando al original (que tiene los manifests de enrichers). + db_only_dir = tmp_path / "install_dir" + db_only_dir.mkdir() + new_app_db = db_only_dir / "graph_explorer.db" + # Crear el schema vacio en la nueva ubicacion. + cn = sqlite3.connect(new_app_db) + cn.executescript(APP_SCHEMA) + cn.commit() + cn.close() + + env = dict(env_dirs["env"]) + env["GX_APP_DB"] = str(new_app_db) + # GX_APP_DIR queda en env_dirs["dir"] donde estan los enrichers. + + node = run_gx({**env_dirs, "env": env}, "node", "create", + "--name", "x", "--type", "text") + run_gx({**env_dirs, "env": env}, "enricher", "run", + "split_sentences", "--node", node["id"]) + + # El JSON DEBE estar junto al nuevo APP_DB. + files_in_new_db_dir = list( + (db_only_dir / "agent_jobs_queue").glob("*.json")) + files_in_app_dir = list( + (env_dirs["dir"] / "agent_jobs_queue").glob("*.json")) + assert len(files_in_new_db_dir) == 1, \ + "queue file no aparecio junto al GX_APP_DB" + assert files_in_app_dir == [], \ + "queue file aparecio junto al GX_APP_DIR (regresion del bug)" + + def test_enricher_run_writes_log_to_gx_cli_log(self, env_dirs): + """Los logs persistentes deben acabar en gx-cli.log junto a + graph_explorer.db para auditoria del agente Echo.""" + self._make_enricher(env_dirs, "split_sentences") + node = run_gx(env_dirs, "node", "create", "--name", "logged", + "--type", "text") + run_gx(env_dirs, "enricher", "run", "split_sentences", + "--node", node["id"]) + log_file = env_dirs["dir"] / "gx-cli.log" + assert log_file.exists() + content = log_file.read_text(encoding="utf-8") + # Algun log de node_create + alguno relacionado al enricher. + assert "node_create" in content + # El _log de enricher_run no se llama desde cmd_enricher_run + # actualmente (escribe directo a stderr). Si en el futuro se + # anyade, este assert lo cubrira automaticamente — por ahora + # basta con que node_create haya escrito. + class TestCliQuery: def test_query_select(self, env_dirs):