6df04652d8
Infra para correr enrichers en background mientras la app sigue interactiva.
C++:
- jobs.{h,cpp}: tabla jobs en graph_explorer.db, JobRunner con N=2 std::thread
workers, fork+exec POSIX con pipes, parser de PROGRESS:<float> <stage> en
stderr, captura de stdout JSON, persistencia + dirty_counter.
- enrichers.{h,cpp}: scanner de enrichers/<id>/manifest.yaml, parser YAML
minimo (id/name/description/applies_to), filtro por tipo de nodo.
- views_jobs.cpp: panel "Jobs" dockeable con tabla (status/enricher/target/
progress/time), filtro all/active/done/errors, cancelar/borrar inline.
Wiring:
- main.cpp: resolve_registry_root() (FN_REGISTRY_ROOT env o subir desde cwd
buscando registry.db), jobs_init/enrichers_load antes de fn::run_app,
jobs_shutdown al cerrar, dirty_counter -> want_reload, jobs_set_ops_db al
cambiar de proyecto.
- main.cpp:render_context_menu: menu "Run enricher" sustituye placeholder
con submenu filtrado por type_ref via enrichers_for_type. Submit abre
panel Jobs auto.
- views.h: AppState::panel_jobs flag + decl views_jobs().
- CMakeLists.txt: anade jobs.cpp + enrichers.cpp + views_jobs.cpp y enlaza
Threads::Threads.
Wire protocol enricher (subprocess Python):
- stdin: JSON con node_id, metadata, ops_db_path, app_dir, cache_dir,
registry_root, params.
- stderr: PROGRESS:<float> <stage> + LOG lineas libres.
- stdout: JSON resumen al final.
- exit 0 = ok, !=0 = error con stderr capturado en panel Jobs.
El run.py escribe directamente al operations.db (sqlite3 stdlib) — C++ solo
orquesta, no parsea entities/relations.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
173 lines
5.1 KiB
C++
173 lines
5.1 KiB
C++
#include "enrichers.h"
|
|
|
|
#include <algorithm>
|
|
#include <cctype>
|
|
#include <cstdio>
|
|
#include <cstring>
|
|
#include <dirent.h>
|
|
#include <fstream>
|
|
#include <sstream>
|
|
#include <sys/stat.h>
|
|
|
|
namespace ge {
|
|
|
|
namespace {
|
|
|
|
std::vector<EnricherSpec> g_enrichers;
|
|
|
|
std::string strip(const std::string& s) {
|
|
size_t a = 0, b = s.size();
|
|
while (a < b && std::isspace((unsigned char)s[a])) ++a;
|
|
while (b > a && std::isspace((unsigned char)s[b - 1])) --b;
|
|
return s.substr(a, b - a);
|
|
}
|
|
|
|
std::string strip_quotes(const std::string& s) {
|
|
if (s.size() >= 2) {
|
|
if ((s.front() == '"' && s.back() == '"') ||
|
|
(s.front() == '\'' && s.back() == '\'')) {
|
|
return s.substr(1, s.size() - 2);
|
|
}
|
|
}
|
|
return s;
|
|
}
|
|
|
|
std::string lower(std::string s) {
|
|
for (auto& c : s) c = (char)std::tolower((unsigned char)c);
|
|
return s;
|
|
}
|
|
|
|
// Parsea una lista inline `[a, b, c]` o "[Webpage, Url]". Tolerante a
|
|
// espacios y a comillas simples/dobles dentro. NO soporta listas
|
|
// multi-linea — el manifest las usa siempre inline.
|
|
std::vector<std::string> parse_inline_list(const std::string& v) {
|
|
std::vector<std::string> out;
|
|
std::string s = strip(v);
|
|
if (s.size() < 2 || s.front() != '[' || s.back() != ']') return out;
|
|
s = s.substr(1, s.size() - 2);
|
|
std::string token;
|
|
auto flush = [&]() {
|
|
std::string t = strip_quotes(strip(token));
|
|
if (!t.empty()) out.push_back(std::move(t));
|
|
token.clear();
|
|
};
|
|
for (char c : s) {
|
|
if (c == ',') flush();
|
|
else token.push_back(c);
|
|
}
|
|
flush();
|
|
return out;
|
|
}
|
|
|
|
// Manifest YAML soportado (subset):
|
|
// id: fetch_webpage
|
|
// name: "Fetch web page"
|
|
// description: "..."
|
|
// applies_to: [Webpage, Url]
|
|
// params: <- v1 ignora bloque
|
|
// - { name: timeout_s, ... }
|
|
//
|
|
// Las claves anidadas bajo `params:` se ignoran (saltamos lineas indentadas).
|
|
bool parse_manifest(const std::string& path, EnricherSpec* out) {
|
|
std::ifstream f(path);
|
|
if (!f) return false;
|
|
std::string line;
|
|
bool in_skip_block = false;
|
|
while (std::getline(f, line)) {
|
|
// Strip CR de Windows.
|
|
if (!line.empty() && line.back() == '\r') line.pop_back();
|
|
|
|
// Linea blanca o comentario.
|
|
std::string trim = strip(line);
|
|
if (trim.empty() || trim.front() == '#') continue;
|
|
|
|
// Si la linea NO empieza con whitespace, salimos del bloque skip.
|
|
bool indented = !line.empty() && std::isspace((unsigned char)line.front());
|
|
if (!indented) in_skip_block = false;
|
|
if (in_skip_block) continue;
|
|
|
|
size_t colon = trim.find(':');
|
|
if (colon == std::string::npos) continue;
|
|
|
|
std::string key = strip(trim.substr(0, colon));
|
|
std::string val = strip(trim.substr(colon + 1));
|
|
|
|
if (key == "id") out->id = strip_quotes(val);
|
|
else if (key == "name") out->name = strip_quotes(val);
|
|
else if (key == "description") out->description = strip_quotes(val);
|
|
else if (key == "applies_to") out->applies_to = parse_inline_list(val);
|
|
else if (key == "params" && val.empty()) in_skip_block = true;
|
|
// emits/relations los ignoramos en v1 (solo informativos).
|
|
}
|
|
return !out->id.empty();
|
|
}
|
|
|
|
} // namespace
|
|
|
|
int enrichers_load(const char* enrichers_dir) {
|
|
g_enrichers.clear();
|
|
if (!enrichers_dir || !*enrichers_dir) return -1;
|
|
|
|
DIR* d = opendir(enrichers_dir);
|
|
if (!d) return -1;
|
|
|
|
struct dirent* ent;
|
|
while ((ent = readdir(d)) != nullptr) {
|
|
if (ent->d_name[0] == '.') continue;
|
|
|
|
std::string sub = std::string(enrichers_dir) + "/" + ent->d_name;
|
|
struct stat st{};
|
|
if (stat(sub.c_str(), &st) != 0 || !S_ISDIR(st.st_mode)) continue;
|
|
|
|
std::string manifest = sub + "/manifest.yaml";
|
|
std::string runpy = sub + "/run.py";
|
|
if (stat(manifest.c_str(), &st) != 0) continue;
|
|
if (stat(runpy.c_str(), &st) != 0) continue;
|
|
|
|
EnricherSpec spec;
|
|
if (!parse_manifest(manifest, &spec)) {
|
|
std::fprintf(stderr, "[enrichers] parse failed: %s\n", manifest.c_str());
|
|
continue;
|
|
}
|
|
spec.run_path = runpy;
|
|
g_enrichers.push_back(std::move(spec));
|
|
}
|
|
closedir(d);
|
|
|
|
std::sort(g_enrichers.begin(), g_enrichers.end(),
|
|
[](const EnricherSpec& a, const EnricherSpec& b) {
|
|
return a.name < b.name;
|
|
});
|
|
return (int)g_enrichers.size();
|
|
}
|
|
|
|
const std::vector<EnricherSpec>& enrichers_all() {
|
|
return g_enrichers;
|
|
}
|
|
|
|
std::vector<EnricherSpec> enrichers_for_type(const char* type_ref) {
|
|
std::vector<EnricherSpec> out;
|
|
if (!type_ref || !*type_ref) return out;
|
|
std::string want = lower(type_ref);
|
|
for (const auto& e : g_enrichers) {
|
|
if (e.applies_to.empty()) {
|
|
out.push_back(e);
|
|
continue;
|
|
}
|
|
for (const auto& t : e.applies_to) {
|
|
if (lower(t) == want) { out.push_back(e); break; }
|
|
}
|
|
}
|
|
return out;
|
|
}
|
|
|
|
const EnricherSpec* enricher_by_id(const char* id) {
|
|
if (!id || !*id) return nullptr;
|
|
for (const auto& e : g_enrichers) {
|
|
if (e.id == id) return &e;
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
} // namespace ge
|