feat(jobs): runtime Python embebido + cadena de fallback (issue 0033 fase B)

Permite distribuir graph_explorer.exe Windows sin dependencia de WSL
ni del .venv del registry. Tambien funciona en Linux como bundle
autocontenido portable.

Cambios:

1. tools/freeze_python_runtime.sh
   - Linux: copia python-build-standalone (uv) ~87 MB,
     elimina marker EXTERNALLY-MANAGED, instala wheels.
   - Windows: descarga python-3.12.7-embed-amd64.zip oficial
     (~12 MB), habilita site-packages, instala wheels via
     pip install --target --platform win_amd64.
   - Idempotente via runtime/.lock con SHA256 del estado.
   - Lee python_runtime_deps del frontmatter de app.md.

2. jobs.cpp::cached_python_runtime() — resolver con cadena:
     1. <exe_dir>/runtime/python/{python.exe|bin/python3}  (embedded)
     2. $FN_PYTHON                                         (env)
     3. <registry_root>/python/.venv/bin/python3           (registry_venv)
     4. python3 del PATH                                   (system)
   Loggea procedencia al iniciar jobs_init.

3. POSIX run_subprocess: usa el runtime resuelto en lugar del
   path hardcodeado.

4. Windows run_subprocess: ramifica por needs_wsl. Si embedded
   o env, lanza Python Windows nativo via CreateProcessW
   directamente (run_path tambien Windows nativo). Solo el
   legacy registry_venv sigue por wsl.exe.

5. app.md: nuevos campos python_runtime: true y
   python_runtime_deps: [requests, certifi, urllib3].

6. .gitignore extendido con runtime/, projects/, _vendored/,
   .vendor.lock, binarios Go de enrichers.

Tests: 26/26 verde — 16 originales + 6 dispatcher fase A + 4
nuevos del resolver fase B (con/sin embed, FN_PYTHON, idempotencia
del freeze script).

Smoke E2E manual: runtime/python/bin/python3 ejecuta web_search
con cwd /tmp y registry_root pasado en ctx, sin tocar el .venv del
registry.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-02 16:51:02 +02:00
parent 2238355f40
commit 30f6f3758f
6 changed files with 521 additions and 27 deletions
+155 -17
View File
@@ -15,6 +15,7 @@
#include <ctime>
#include <filesystem>
#include <memory>
#include <sys/stat.h>
#include <mutex>
#include <queue>
#include <sstream>
@@ -74,6 +75,116 @@ struct State {
State* g_state = nullptr;
// ============================================================================
// Python runtime resolver (issue 0033 fase B)
// ============================================================================
// Resultado de resolver el Python runtime: path absoluto + procedencia
// + flag indicando si el path apunta a un Python dentro de WSL (solo
// Windows usa este flag para decidir si lanzar via wsl.exe).
struct PyRuntime {
std::string path; // path al ejecutable Python
std::string kind; // "embedded" | "env" | "registry_venv" | "system" | ""
bool needs_wsl = false;
};
// Determina el directorio del ejecutable actual (junto al cual se
// busca runtime/python/). En POSIX usa /proc/self/exe; en Windows
// usa GetModuleFileNameW.
std::string get_exe_dir() {
#ifdef _WIN32
wchar_t buf[MAX_PATH * 2];
DWORD n = GetModuleFileNameW(nullptr, buf, (DWORD)(sizeof(buf)/sizeof(buf[0])));
if (n == 0 || n >= sizeof(buf)/sizeof(buf[0])) return "";
int u8n = WideCharToMultiByte(CP_UTF8, 0, buf, (int)n, nullptr, 0, nullptr, nullptr);
std::string out(u8n, 0);
WideCharToMultiByte(CP_UTF8, 0, buf, (int)n, out.data(), u8n, nullptr, nullptr);
size_t slash = out.find_last_of("/\\");
return (slash == std::string::npos) ? "" : out.substr(0, slash);
#else
char buf[4096];
ssize_t n = readlink("/proc/self/exe", buf, sizeof(buf) - 1);
if (n <= 0) return "";
buf[n] = 0;
std::string out(buf);
size_t slash = out.find_last_of('/');
return (slash == std::string::npos) ? "" : out.substr(0, slash);
#endif
}
bool file_exists(const std::string& p) {
if (p.empty()) return false;
struct stat st{};
return stat(p.c_str(), &st) == 0 && !S_ISDIR(st.st_mode);
}
// Cadena de fallback (logged una sola vez al primer uso):
// 1. <exe_dir>/runtime/python/{python.exe|bin/python3} -> kind=embedded
// 2. $FN_PYTHON -> kind=env
// 3. <registry_root>/python/.venv/bin/python3 -> kind=registry_venv
// 4. python3 del PATH -> kind=system
PyRuntime resolve_python_runtime() {
PyRuntime r;
std::string exe = get_exe_dir();
#ifdef _WIN32
if (!exe.empty()) {
std::string p = exe + "\\runtime\\python\\python.exe";
if (file_exists(p)) { r.path = p; r.kind = "embedded"; return r; }
}
#else
if (!exe.empty()) {
std::string p = exe + "/runtime/python/bin/python3";
if (file_exists(p)) { r.path = p; r.kind = "embedded"; return r; }
}
#endif
if (const char* env = std::getenv("FN_PYTHON"); env && *env) {
if (file_exists(env)) { r.path = env; r.kind = "env"; return r; }
}
// Legacy: el venv del registry. En Windows requiere wsl.exe
// porque ese .venv vive en el sistema de archivos Linux.
if (!g_state->registry_root.empty()) {
std::string p = g_state->registry_root + "/python/.venv/bin/python3";
#ifdef _WIN32
// En Windows el path es WSL-form; no podemos statearlo desde
// Windows directamente, asumimos que existe si registry_root
// se resolvio. needs_wsl=true marca que jobs.cpp debe seguir
// el camino legacy con wsl.exe.
r.path = p;
r.kind = "registry_venv";
r.needs_wsl = true;
return r;
#else
if (file_exists(p)) { r.path = p; r.kind = "registry_venv"; return r; }
#endif
}
#ifdef _WIN32
r.path = "python.exe";
#else
r.path = "python3";
#endif
r.kind = "system";
return r;
}
// Cache estatico — log una vez la procedencia para que el usuario
// vea en stdout que runtime se eligio.
const PyRuntime& cached_python_runtime() {
static bool inited = false;
static PyRuntime r;
if (!inited) {
r = resolve_python_runtime();
std::fprintf(stdout,
"[jobs] python runtime: kind=%s path=%s wsl=%d\n",
r.kind.c_str(), r.path.c_str(), r.needs_wsl ? 1 : 0);
inited = true;
}
return r;
}
long long now_ms() {
using namespace std::chrono;
return duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
@@ -436,8 +547,9 @@ ProcResult run_subprocess(const std::string& job_id,
// Construir cmdline segun lang (issue 0033).
// - "go": ejecutar el .exe nativo directamente, sin wsl.exe.
// - "python": wsl.exe --cd <root> -- python3 <run.py> (legacy)
// - "bash": wsl.exe --cd <root> -- bash <run.sh>
// - "python": embedded (Windows nativo) si existe runtime/, si
// no fallback a wsl.exe + venv del registry.
// - "bash": wsl.exe --cd <root> -- bash <run.sh> (siempre)
std::wstring cmdline;
if (lang == "go") {
// run_path es el .exe Windows nativo. CreateProcessW lo lanza
@@ -445,21 +557,35 @@ ProcResult run_subprocess(const std::string& job_id,
cmdline = L"\"";
cmdline += utf8_to_wide(run_path);
cmdline += L"\"";
} else {
} else if (lang == "bash") {
std::string run_wsl = to_wsl_path(run_path);
std::string root_wsl = to_wsl_path(g_state->registry_root);
std::string interp;
if (lang == "bash") {
interp = "/bin/bash";
} else {
interp = root_wsl + "/python/.venv/bin/python3";
}
cmdline = L"wsl.exe --cd ";
cmdline += utf8_to_wide(root_wsl);
cmdline += L" -- ";
cmdline += utf8_to_wide(interp);
cmdline += L" ";
cmdline += L" -- /bin/bash ";
cmdline += utf8_to_wide(run_wsl);
} else {
// python — fase B: usar embedded si esta disponible.
const PyRuntime& rt = cached_python_runtime();
if (rt.needs_wsl) {
// Legacy: registry venv vive en WSL.
std::string run_wsl = to_wsl_path(run_path);
std::string root_wsl = to_wsl_path(g_state->registry_root);
cmdline = L"wsl.exe --cd ";
cmdline += utf8_to_wide(root_wsl);
cmdline += L" -- ";
cmdline += utf8_to_wide(rt.path);
cmdline += L" ";
cmdline += utf8_to_wide(run_wsl);
} else {
// Embedded / FN_PYTHON / system — Python nativo Windows.
// run_path es Windows nativo, no necesita conversion.
cmdline = L"\"";
cmdline += utf8_to_wide(rt.path);
cmdline += L"\" \"";
cmdline += utf8_to_wide(run_path);
cmdline += L"\"";
}
}
std::vector<wchar_t> cmdbuf(cmdline.begin(), cmdline.end());
@@ -648,11 +774,18 @@ ProcResult run_subprocess(const std::string& job_id,
std::fprintf(stderr, "execv bash failed\n");
_exit(127);
}
// Default: python.
std::string py = g_state->registry_root + "/python/.venv/bin/python3";
const char* argv[] = { py.c_str(), run_path.c_str(), nullptr };
execv(py.c_str(), (char* const*)argv);
std::fprintf(stderr, "execv failed: %s\n", py.c_str());
// Default: python — usa la cadena de fallback de fase B
// (embedded > FN_PYTHON > registry venv > system PATH).
const PyRuntime& rt = cached_python_runtime();
if (rt.kind == "system") {
// Lookup en PATH via execvp.
const char* argv[] = { rt.path.c_str(), run_path.c_str(), nullptr };
execvp(rt.path.c_str(), (char* const*)argv);
} else {
const char* argv[] = { rt.path.c_str(), run_path.c_str(), nullptr };
execv(rt.path.c_str(), (char* const*)argv);
}
std::fprintf(stderr, "execv failed: %s\n", rt.path.c_str());
_exit(127);
}
@@ -980,6 +1113,11 @@ bool jobs_init(const char* app_db_path,
}
}
// Forzar resolucion del Python runtime al iniciar — asi el log
// sale en stdout una sola vez con la procedencia (embedded /
// env / registry_venv / system) y el usuario ve que se elegira.
(void)cached_python_runtime();
for (int i = 0; i < n_workers; ++i) {
g_state->workers.emplace_back(worker_loop);
}