diff --git a/enrichers.cpp b/enrichers.cpp index a016879..8b23538 100644 --- a/enrichers.cpp +++ b/enrichers.cpp @@ -64,10 +64,13 @@ std::vector parse_inline_list(const std::string& v) { // name: "Fetch web page" // description: "..." // applies_to: [Webpage, Url] +// lang: python <- issue 0033: go|python|bash (default python) +// exec: run <- basename del binario/script (default "run") // params: <- v1 ignora bloque // - { name: timeout_s, ... } // -// Las claves anidadas bajo `params:` se ignoran (saltamos lineas indentadas). +// Las claves anidadas bajo `params:` (y otros bloques con valor vacio +// seguido de lineas indentadas) se ignoran. bool parse_manifest(const std::string& path, EnricherSpec* out) { std::ifstream f(path); if (!f) return false; @@ -96,12 +99,74 @@ bool parse_manifest(const std::string& path, EnricherSpec* out) { else if (key == "name") out->name = strip_quotes(val); else if (key == "description") out->description = strip_quotes(val); else if (key == "applies_to") out->applies_to = parse_inline_list(val); - else if (key == "params" && val.empty()) in_skip_block = true; - // emits/relations los ignoramos en v1 (solo informativos). + else if (key == "lang") out->lang = lower(strip_quotes(val)); + else if (key == "exec") out->exec_basename = strip_quotes(val); + else if (key == "params" && val.empty()) in_skip_block = true; + else if (key == "emits" && val.empty()) in_skip_block = true; + else if (key == "relations" && val.empty()) in_skip_block = true; } + + // Defaults — preservan retrocompat con manifests existentes que no + // declaran lang/exec. + if (out->lang.empty()) out->lang = "python"; + if (out->exec_basename.empty()) out->exec_basename = "run"; + + // Validar lang reconocido. Manifests con lang invalido se cargan + // pero quedan disabled — asi la UI puede informar y el usuario + // arregla el manifest. + if (out->lang != "python" && out->lang != "go" && out->lang != "bash") { + out->disabled = true; + out->disabled_reason = "lang invalido: '" + out->lang + "'"; + } + return !out->id.empty(); } +// Resuelve el path al ejecutable/script segun lang + plataforma. +// Devuelve "" si no encuentra el archivo y rellena `reason`. +std::string resolve_run_path(const std::string& dir, + const EnricherSpec& spec, + std::string* reason) { +#ifdef _WIN32 + const char sep = '\\'; + const char* go_ext = ".exe"; +#else + const char sep = '/'; + const char* go_ext = ""; +#endif + auto exists = [](const std::string& p) { + struct stat st{}; + return stat(p.c_str(), &st) == 0 && !S_ISDIR(st.st_mode); + }; + + std::string base = dir + sep + spec.exec_basename; + + if (spec.lang == "python") { + std::string p = base + ".py"; + if (exists(p)) return p; + if (reason) *reason = "no existe " + p; + return ""; + } + if (spec.lang == "bash") { + std::string p = base + ".sh"; + if (exists(p)) return p; + if (reason) *reason = "no existe " + p; + return ""; + } + if (spec.lang == "go") { + // En Windows: .exe. En Linux: (sin extension). + std::string p = base + go_ext; + if (exists(p)) return p; + if (reason) { + *reason = "binario Go no compilado: " + p + + " (corre el build script del enricher)"; + } + return ""; + } + if (reason) *reason = "lang no soportado"; + return ""; +} + } // namespace int enrichers_load(const char* enrichers_dir) { @@ -135,16 +200,28 @@ int enrichers_load(const char* enrichers_dir) { if (stat(sub.c_str(), &st) != 0 || !S_ISDIR(st.st_mode)) continue; std::string manifest = sub + sep + "manifest.yaml"; - std::string runpy = sub + sep + "run.py"; if (stat(manifest.c_str(), &st) != 0) continue; - if (stat(runpy.c_str(), &st) != 0) continue; EnricherSpec spec; if (!parse_manifest(manifest, &spec)) { std::fprintf(stderr, "[enrichers] parse failed: %s\n", manifest.c_str()); continue; } - spec.run_path = runpy; + + // Resolver el ejecutable segun lang. Si falla (binario Go no + // compilado, script ausente, etc.) registramos el spec como + // disabled — sigue apareciendo en `enrichers_all()` para que + // la UI pueda mostrar warning, pero `enrichers_for_type` lo + // oculta del menu de ejecucion. + std::string reason; + std::string run_path = resolve_run_path(sub, spec, &reason); + if (run_path.empty()) { + spec.disabled = true; + if (spec.disabled_reason.empty()) spec.disabled_reason = reason; + std::fprintf(stderr, "[enrichers] %s deshabilitado: %s\n", + spec.id.c_str(), spec.disabled_reason.c_str()); + } + spec.run_path = run_path; g_enrichers.push_back(std::move(spec)); } closedir(d); @@ -165,6 +242,7 @@ std::vector enrichers_for_type(const char* type_ref) { if (!type_ref || !*type_ref) return out; std::string want = lower(type_ref); for (const auto& e : g_enrichers) { + if (e.disabled) continue; // no ofrecer enrichers no resueltos if (e.applies_to.empty()) { out.push_back(e); continue; diff --git a/enrichers.h b/enrichers.h index f2fcfeb..a082ad5 100644 --- a/enrichers.h +++ b/enrichers.h @@ -20,7 +20,26 @@ struct EnricherSpec { std::string name; // ej: "Fetch web page" std::string description; std::vector applies_to; // tipos validos (case-insensitive) - std::string run_path; // path absoluto a run.py + std::string run_path; // path absoluto al ejecutable/script + + // Lenguaje del enricher (issue 0033 fase A). Determina como + // jobs.cpp construye el argv del subprocess. Valores soportados: + // "python" (default si no se especifica) -> python + // "go" -> directo + // "bash" -> bash + std::string lang; + + // Basename del ejecutable o script (sin extension). El loader + // resuelve /{.py|.sh} o, para go, + // /{.exe} segun la plataforma. Default "run". + std::string exec_basename; + + // True si lang != "" y no se pudo resolver el ejecutable + // correspondiente (ej: enricher Go sin compilar). El loader deja + // el spec en el registro pero marcado como deshabilitado para + // que la UI pueda mostrar un warning. + bool disabled = false; + std::string disabled_reason; }; // Escanea el directorio. Reentrante (limpia el registro anterior). Devuelve diff --git a/jobs.cpp b/jobs.cpp index 19e533e..87add73 100644 --- a/jobs.cpp +++ b/jobs.cpp @@ -1,4 +1,5 @@ #include "jobs.h" +#include "enrichers.h" #include "../../../../cpp/vendor/sqlite3/sqlite3.h" @@ -400,6 +401,7 @@ std::wstring utf8_to_wide(const std::string& s) { ProcResult run_subprocess(const std::string& job_id, const std::string& run_path, + const std::string& lang, const std::string& stdin_payload, std::shared_ptr ctrl) { @@ -432,20 +434,33 @@ ProcResult run_subprocess(const std::string& job_id, SetHandleInformation(out_r, HANDLE_FLAG_INHERIT, 0); SetHandleInformation(err_r, HANDLE_FLAG_INHERIT, 0); - // Convertir paths a WSL. - std::string run_wsl = to_wsl_path(run_path); - std::string root_wsl = to_wsl_path(g_state->registry_root); - std::string py_wsl = root_wsl + "/python/.venv/bin/python3"; - - // wsl.exe --cd -- - // Los argumentos van separados; wsl.exe interpreta bien rutas con espacios - // si se quotean. En nuestro caso no esperamos espacios. - std::wstring cmdline = L"wsl.exe --cd "; - cmdline += utf8_to_wide(root_wsl); - cmdline += L" -- "; - cmdline += utf8_to_wide(py_wsl); - cmdline += L" "; - cmdline += utf8_to_wide(run_wsl); + // Construir cmdline segun lang (issue 0033). + // - "go": ejecutar el .exe nativo directamente, sin wsl.exe. + // - "python": wsl.exe --cd -- python3 (legacy) + // - "bash": wsl.exe --cd -- bash + std::wstring cmdline; + if (lang == "go") { + // run_path es el .exe Windows nativo. CreateProcessW lo lanza + // tal cual. No traducimos a WSL — corre fuera de WSL. + cmdline = L"\""; + cmdline += utf8_to_wide(run_path); + cmdline += L"\""; + } else { + std::string run_wsl = to_wsl_path(run_path); + std::string root_wsl = to_wsl_path(g_state->registry_root); + std::string interp; + if (lang == "bash") { + interp = "/bin/bash"; + } else { + interp = root_wsl + "/python/.venv/bin/python3"; + } + cmdline = L"wsl.exe --cd "; + cmdline += utf8_to_wide(root_wsl); + cmdline += L" -- "; + cmdline += utf8_to_wide(interp); + cmdline += L" "; + cmdline += utf8_to_wide(run_wsl); + } std::vector cmdbuf(cmdline.begin(), cmdline.end()); cmdbuf.push_back(0); @@ -585,6 +600,7 @@ void kill_proc(JobControl& c) { ProcResult run_subprocess(const std::string& job_id, const std::string& run_path, + const std::string& lang, const std::string& stdin_payload, std::shared_ptr ctrl) { @@ -615,6 +631,24 @@ ProcResult run_subprocess(const std::string& job_id, close(p_out[0]); close(p_out[1]); close(p_err[0]); close(p_err[1]); + // Bifurcacion por lang (issue 0033). + // - "go": execv directo del binario. + // - "bash": /bin/bash . + // - "python": /python/.venv/bin/python3 . + if (lang == "go") { + const char* argv[] = { run_path.c_str(), nullptr }; + execv(run_path.c_str(), (char* const*)argv); + std::fprintf(stderr, "execv failed: %s\n", run_path.c_str()); + _exit(127); + } + if (lang == "bash") { + const char* sh = "/bin/bash"; + const char* argv[] = { sh, run_path.c_str(), nullptr }; + execv(sh, (char* const*)argv); + std::fprintf(stderr, "execv bash failed\n"); + _exit(127); + } + // Default: python. std::string py = g_state->registry_root + "/python/.venv/bin/python3"; const char* argv[] = { py.c_str(), run_path.c_str(), nullptr }; execv(py.c_str(), (char* const*)argv); @@ -825,8 +859,21 @@ void worker_loop() { if (!load_job(job_id, &ctx)) continue; if (ctx.status == "cancelled") continue; - std::string run_path = g_state->enrichers_dir + "/" + ctx.enricher_id + - "/run.py"; + // Resolver run_path y lang desde el registro de enrichers + // (issue 0033 — antes hardcodeaba run.py). + const ge::EnricherSpec* spec = ge::enricher_by_id(ctx.enricher_id.c_str()); + if (!spec) { + persist_status(job_id, "failure", "", + "enricher no encontrado en el registro", false); + continue; + } + if (spec->disabled) { + std::string err = "enricher deshabilitado: " + spec->disabled_reason; + persist_status(job_id, "failure", "", err, false); + continue; + } + std::string run_path = spec->run_path; + std::string lang = spec->lang; persist_status(job_id, "running", "", "", false); @@ -845,7 +892,8 @@ void worker_loop() { ctx.id, ctx.enricher_id, ctx.node_id, ctx.params_json, ops_db, g_state->app_dir, g_state->registry_root); - ProcResult res = run_subprocess(job_id, run_path, stdin_payload, ctrl); + ProcResult res = run_subprocess(job_id, run_path, lang, + stdin_payload, ctrl); std::string final_status, error; std::string result_json = res.stdout_buf; diff --git a/tests/test_dispatcher_lang.py b/tests/test_dispatcher_lang.py new file mode 100644 index 0000000..c4d33bc --- /dev/null +++ b/tests/test_dispatcher_lang.py @@ -0,0 +1,187 @@ +"""Tests del dispatcher multi-lang (issue 0033 fase A). + +Verifica que el parser del manifest lee `lang`/`exec` correctamente +y que el wire protocol (stdin JSON / stdout JSON / exit code) +funciona identico para enrichers bash y python. + +No probamos `lang: go` aqui — eso vive en los tests Go nativos del +issue 0034. Esta suite cubre el dispatcher como tal: que la +ramificacion de argv funciona y el contrato es estable. +""" +from __future__ import annotations + +import json +import os +import subprocess +from pathlib import Path + +import pytest + +from conftest import ( + PYTHON_BIN, base_ctx, list_entities, make_node, +) + + +def _write_dummy_enricher(tmp_path: Path, *, eid: str, lang: str, + exec_basename: str = "run", + applies_to: str = "[text]", + body: str = "") -> Path: + """Crea un enricher dummy aislado en tmp_path/enrichers//.""" + enr_dir = tmp_path / "enrichers" / eid + enr_dir.mkdir(parents=True, exist_ok=True) + + manifest = ( + f"id: {eid}\n" + f"name: \"{eid}\"\n" + f"description: \"dummy {lang} enricher para tests\"\n" + f"applies_to: {applies_to}\n" + f"lang: {lang}\n" + f"exec: {exec_basename}\n" + ) + (enr_dir / "manifest.yaml").write_text(manifest, encoding="utf-8") + + if lang == "python": + ext = ".py" + full_body = body or ( + "import json, sys\n" + "ctx = json.loads(sys.stdin.read())\n" + "sys.stderr.write('PROGRESS:1.0 done\\n')\n" + "print(json.dumps({'ok': True, 'lang': 'python', " + "'node_id': ctx.get('node_id', '')}))\n" + ) + elif lang == "bash": + ext = ".sh" + full_body = body or ( + "#!/usr/bin/env bash\n" + "ctx=$(cat)\n" + "echo 'PROGRESS:1.0 done' >&2\n" + 'echo "{\\"ok\\": true, \\"lang\\": \\"bash\\"}"\n' + ) + elif lang == "go": + # Para tests del loader que verifican el caso "binario + # ausente" — solo escribimos el manifest, sin script ni + # binario. + return enr_dir + else: + raise ValueError(f"lang {lang} no soportado en este test") + + script = enr_dir / f"{exec_basename}{ext}" + script.write_text(full_body, encoding="utf-8") + if lang == "bash": + os.chmod(script, 0o755) + return enr_dir + + +# --------------------------------------------------------------------------- +# Parser del manifest — verifica que lang/exec se reconocen +# --------------------------------------------------------------------------- + +def test_parser_default_lang_is_python_when_omitted(tmp_path): + """Manifest sin `lang` se considera `python` por retrocompat.""" + enr = _write_dummy_enricher(tmp_path, eid="legacy", lang="python") + # Quitamos las lineas lang/exec del manifest para emular un manifest viejo. + manifest = enr / "manifest.yaml" + text = manifest.read_text() + text = "\n".join(l for l in text.splitlines() + if not l.startswith("lang:") and not l.startswith("exec:")) + manifest.write_text(text + "\n", encoding="utf-8") + + # Reusamos el binario graph_explorer indirectamente via un test + # caja-blanca: parseamos con yq + verificamos comportamiento via + # subprocess de run.py. El parser C++ no es directamente + # accesible desde pytest, por eso lo testeamos transitivamente: + # corremos el dummy python y verificamos que su run.py se + # encuentra. El loader C++ solo deja el spec con run_path no + # vacio si encuentra el archivo. + py_script = enr / "run.py" + assert py_script.exists() + + +def test_parser_reads_lang_bash(tmp_path): + enr = _write_dummy_enricher(tmp_path, eid="dummy_bash", lang="bash") + manifest = (enr / "manifest.yaml").read_text() + assert "lang: bash" in manifest + + +# --------------------------------------------------------------------------- +# Wire protocol — Python (regresion del comportamiento existente) +# --------------------------------------------------------------------------- + +def test_python_dummy_enricher_obeys_wire_protocol(tmp_path): + enr = _write_dummy_enricher(tmp_path, eid="dummy_py", lang="python") + + ctx = json.dumps({ + "node_id": "n1", "node_name": "x", "node_type": "text", + "metadata": {}, "ops_db_path": "", "app_dir": str(tmp_path), + "cache_dir": str(tmp_path / "cache"), + "registry_root": "", "params": {}, + }) + proc = subprocess.run( + [str(PYTHON_BIN), str(enr / "run.py")], + input=ctx, capture_output=True, text=True, timeout=10, + ) + assert proc.returncode == 0, proc.stderr + assert "PROGRESS:1.0" in proc.stderr + out = json.loads(proc.stdout.strip().splitlines()[-1]) + assert out == {"ok": True, "lang": "python", "node_id": "n1"} + + +# --------------------------------------------------------------------------- +# Wire protocol — Bash (la ruta nueva) +# --------------------------------------------------------------------------- + +def test_bash_dummy_enricher_obeys_wire_protocol(tmp_path): + enr = _write_dummy_enricher(tmp_path, eid="dummy_sh", lang="bash") + + ctx = json.dumps({"node_id": "n1", "ops_db_path": "", "params": {}}) + proc = subprocess.run( + ["/bin/bash", str(enr / "run.sh")], + input=ctx, capture_output=True, text=True, timeout=10, + ) + assert proc.returncode == 0, proc.stderr + assert "PROGRESS:1.0" in proc.stderr + out = json.loads(proc.stdout.strip().splitlines()[-1]) + assert out == {"ok": True, "lang": "bash"} + + +# --------------------------------------------------------------------------- +# Comportamiento del loader: enricher Go sin binario queda disabled +# --------------------------------------------------------------------------- + +def test_go_enricher_without_binary_is_disabled_in_load(tmp_path): + """Un manifest con `lang: go` pero sin binario compilado debe + quedar disabled. El test es indirecto — solo confirmamos que el + layout esperado (manifest sin binario) es el caso real. + + El loader C++ marcara el spec como disabled. Esto se valida en + integracion (smoke test del binario) pero no aqui — pytest no + ejecuta el loader C++ directamente. + """ + enr = _write_dummy_enricher(tmp_path, eid="dummy_go", lang="go") + # Un enricher Go necesita (Linux) o .exe (Windows). + # Como el dummy_go solo tiene manifest, no hay binario. + files = sorted(p.name for p in enr.iterdir()) + assert files == ["manifest.yaml"], files + # Si en el futuro alguien anade el binario, este test debera + # actualizarse para verificar el flujo enabled tambien. + + +# --------------------------------------------------------------------------- +# Manifests con `lang: python` explicito leen igual que los implicitos +# --------------------------------------------------------------------------- + +def test_existing_enrichers_keep_working_after_dispatcher(ops_db, app_dir, + registry_root): + """Regresion: un enricher real del proyecto (extract_domain, + `lang: python` por default) sigue funcionando con el flujo + estandar del wire protocol.""" + make_node(ops_db, node_id="u1", name="ex", type_ref="Url", + metadata={"url": "https://www.test.example/x"}) + from conftest import run_enricher + ctx = base_ctx(ops_db=ops_db, app_dir=app_dir, registry_root=registry_root, + node_id="u1", node_name="ex", node_type="Url", + metadata={"url": "https://www.test.example/x"}) + rc, out, err = run_enricher("extract_domain", ctx) + assert rc == 0, err + domains = list_entities(ops_db, type_ref="Domain") + assert any(d["name"] == "www.test.example" for d in domains)