#include "enrichers.h" #include #include #include #include #include #include #include #include namespace ge { namespace { std::vector g_enrichers; std::string strip(const std::string& s) { size_t a = 0, b = s.size(); while (a < b && std::isspace((unsigned char)s[a])) ++a; while (b > a && std::isspace((unsigned char)s[b - 1])) --b; return s.substr(a, b - a); } std::string strip_quotes(const std::string& s) { if (s.size() >= 2) { if ((s.front() == '"' && s.back() == '"') || (s.front() == '\'' && s.back() == '\'')) { return s.substr(1, s.size() - 2); } } return s; } std::string lower(std::string s) { for (auto& c : s) c = (char)std::tolower((unsigned char)c); return s; } // Parsea una lista inline `[a, b, c]` o "[Webpage, Url]". Tolerante a // espacios y a comillas simples/dobles dentro. NO soporta listas // multi-linea — el manifest las usa siempre inline. std::vector parse_inline_list(const std::string& v) { std::vector out; std::string s = strip(v); if (s.size() < 2 || s.front() != '[' || s.back() != ']') return out; s = s.substr(1, s.size() - 2); std::string token; auto flush = [&]() { std::string t = strip_quotes(strip(token)); if (!t.empty()) out.push_back(std::move(t)); token.clear(); }; for (char c : s) { if (c == ',') flush(); else token.push_back(c); } flush(); return out; } // Split por comas a nivel cero, respetando comillas y nesting de [] / {}. // El YAML inline `{ name: limit, type: int, default: 10 }` puede contener // strings con comas entre comillas — un split crudo las rompería. std::vector split_top_level(const std::string& s) { std::vector out; std::string cur; int depth_b = 0, depth_c = 0; char quote = 0; for (char c : s) { if (quote) { cur.push_back(c); if (c == quote) quote = 0; continue; } if (c == '"' || c == '\'') { quote = c; cur.push_back(c); continue; } if (c == '[') ++depth_b; if (c == ']') --depth_b; if (c == '{') ++depth_c; if (c == '}') --depth_c; if (c == ',' && depth_b == 0 && depth_c == 0) { out.push_back(cur); cur.clear(); continue; } cur.push_back(c); } if (!cur.empty()) out.push_back(cur); return out; } // Parsea un objeto YAML inline `{ name: x, type: int, default: 10 }` a un // EnricherParam. Retorna true si al menos `name` se resolvio. bool parse_inline_param(const std::string& v, EnricherParam* out) { std::string s = strip(v); if (s.size() < 2 || s.front() != '{' || s.back() != '}') return false; s = s.substr(1, s.size() - 2); for (auto& kv : split_top_level(s)) { size_t colon = kv.find(':'); if (colon == std::string::npos) continue; std::string k = strip(kv.substr(0, colon)); std::string val = strip_quotes(strip(kv.substr(colon + 1))); if (k == "name") out->name = val; else if (k == "type") out->type = lower(val); else if (k == "default") out->default_value = val; else if (k == "description") out->description = val; else if (k == "desc") out->description = val; } if (out->type.empty()) out->type = "string"; return !out->name.empty(); } // Manifest YAML soportado (subset): // id: fetch_webpage // name: "Fetch web page" // description: "..." // applies_to: [Webpage, Url] // lang: python <- issue 0033: go|python|bash (default python) // exec: run <- basename del binario/script (default "run") // params: // - { name: timeout_s, type: int, default: 15 } // - { name: region, type: string, default: "" } // // Solo el bloque `params:` se parsea con detalle. Otros bloques con valor // vacio seguido de lineas indentadas (`emits:`, `relations:`, // `uses_functions:`) se ignoran como antes. bool parse_manifest(const std::string& path, EnricherSpec* out) { std::ifstream f(path); if (!f) return false; std::string line; bool in_skip_block = false; bool in_params_block = false; while (std::getline(f, line)) { // Strip CR de Windows. if (!line.empty() && line.back() == '\r') line.pop_back(); // Linea blanca o comentario. std::string trim = strip(line); if (trim.empty() || trim.front() == '#') continue; // Si la linea NO empieza con whitespace, salimos de los bloques // anidados — el siguiente top-level reinicia el contexto. bool indented = !line.empty() && std::isspace((unsigned char)line.front()); if (!indented) { in_skip_block = false; in_params_block = false; } if (in_skip_block) continue; if (in_params_block) { // Linea esperada: ` - { name: x, type: int, default: 10 }`. // Tolera variaciones de indent y comilla. std::string body = trim; if (!body.empty() && body.front() == '-') { body = strip(body.substr(1)); } EnricherParam p; if (parse_inline_param(body, &p)) { out->params.push_back(std::move(p)); } continue; } size_t colon = trim.find(':'); if (colon == std::string::npos) continue; std::string key = strip(trim.substr(0, colon)); std::string val = strip(trim.substr(colon + 1)); if (key == "id") out->id = strip_quotes(val); else if (key == "name") out->name = strip_quotes(val); else if (key == "description") out->description = strip_quotes(val); else if (key == "applies_to") out->applies_to = parse_inline_list(val); else if (key == "lang") out->lang = lower(strip_quotes(val)); else if (key == "exec") out->exec_basename = strip_quotes(val); else if (key == "params") { // `params: []` — vacio explicito, nada que hacer. // `params:` — siguiente bloque indentado son items. std::string vs = strip(val); if (vs.empty()) in_params_block = true; // Si fuese inline (`params: [{...}]`) — formato no usado en // los manifests actuales, lo ignoramos. } else if (key == "emits" && val.empty()) in_skip_block = true; else if (key == "relations" && val.empty()) in_skip_block = true; else if (key == "uses_functions" && val.empty()) in_skip_block = true; } // Defaults — preservan retrocompat con manifests existentes que no // declaran lang/exec. if (out->lang.empty()) out->lang = "python"; if (out->exec_basename.empty()) out->exec_basename = "run"; // Validar lang reconocido. Manifests con lang invalido se cargan // pero quedan disabled — asi la UI puede informar y el usuario // arregla el manifest. if (out->lang != "python" && out->lang != "go" && out->lang != "bash") { out->disabled = true; out->disabled_reason = "lang invalido: '" + out->lang + "'"; } return !out->id.empty(); } // Resuelve el path al ejecutable/script segun lang + plataforma. // Devuelve "" si no encuentra el archivo y rellena `reason`. std::string resolve_run_path(const std::string& dir, const EnricherSpec& spec, std::string* reason) { #ifdef _WIN32 const char sep = '\\'; const char* go_ext = ".exe"; #else const char sep = '/'; const char* go_ext = ""; #endif auto exists = [](const std::string& p) { struct stat st{}; return stat(p.c_str(), &st) == 0 && !S_ISDIR(st.st_mode); }; std::string base = dir + sep + spec.exec_basename; if (spec.lang == "python") { std::string p = base + ".py"; if (exists(p)) return p; if (reason) *reason = "no existe " + p; return ""; } if (spec.lang == "bash") { std::string p = base + ".sh"; if (exists(p)) return p; if (reason) *reason = "no existe " + p; return ""; } if (spec.lang == "go") { // En Windows: .exe. En Linux: (sin extension). std::string p = base + go_ext; if (exists(p)) return p; if (reason) { *reason = "binario Go no compilado: " + p + " (corre el build script del enricher)"; } return ""; } if (reason) *reason = "lang no soportado"; return ""; } } // namespace int enrichers_load(const char* enrichers_dir) { g_enrichers.clear(); if (!enrichers_dir || !*enrichers_dir) return -1; // En Windows los UNC paths esperan backslashes consistentes; mixed // separators (`\\wsl$\\foo/bar`) confunden a opendir de MinGW. std::string dir = enrichers_dir; #ifdef _WIN32 for (char& c : dir) if (c == '/') c = '\\'; #endif DIR* d = opendir(dir.c_str()); if (!d) { std::fprintf(stderr, "[enrichers] opendir failed: %s\n", dir.c_str()); return -1; } struct dirent* ent; while ((ent = readdir(d)) != nullptr) { if (ent->d_name[0] == '.') continue; #ifdef _WIN32 const char sep = '\\'; #else const char sep = '/'; #endif std::string sub = dir + sep + ent->d_name; struct stat st{}; if (stat(sub.c_str(), &st) != 0 || !S_ISDIR(st.st_mode)) continue; std::string manifest = sub + sep + "manifest.yaml"; if (stat(manifest.c_str(), &st) != 0) continue; EnricherSpec spec; if (!parse_manifest(manifest, &spec)) { std::fprintf(stderr, "[enrichers] parse failed: %s\n", manifest.c_str()); continue; } // Resolver el ejecutable segun lang. Si falla (binario Go no // compilado, script ausente, etc.) registramos el spec como // disabled — sigue apareciendo en `enrichers_all()` para que // la UI pueda mostrar warning, pero `enrichers_for_type` lo // oculta del menu de ejecucion. std::string reason; std::string run_path = resolve_run_path(sub, spec, &reason); if (run_path.empty()) { spec.disabled = true; if (spec.disabled_reason.empty()) spec.disabled_reason = reason; std::fprintf(stderr, "[enrichers] %s deshabilitado: %s\n", spec.id.c_str(), spec.disabled_reason.c_str()); } spec.run_path = run_path; g_enrichers.push_back(std::move(spec)); } closedir(d); std::sort(g_enrichers.begin(), g_enrichers.end(), [](const EnricherSpec& a, const EnricherSpec& b) { return a.name < b.name; }); return (int)g_enrichers.size(); } const std::vector& enrichers_all() { return g_enrichers; } std::vector enrichers_for_type(const char* type_ref) { std::vector out; if (!type_ref || !*type_ref) return out; std::string want = lower(type_ref); for (const auto& e : g_enrichers) { if (e.disabled) continue; // no ofrecer enrichers no resueltos if (e.applies_to.empty()) { out.push_back(e); continue; } for (const auto& t : e.applies_to) { if (lower(t) == want) { out.push_back(e); break; } } } return out; } const EnricherSpec* enricher_by_id(const char* id) { if (!id || !*id) return nullptr; for (const auto& e : g_enrichers) { if (e.id == id) return &e; } return nullptr; } } // namespace ge