#include "enrichers.h" #include #include #include #include #include #include #include #include namespace ge { namespace { std::vector g_enrichers; std::string strip(const std::string& s) { size_t a = 0, b = s.size(); while (a < b && std::isspace((unsigned char)s[a])) ++a; while (b > a && std::isspace((unsigned char)s[b - 1])) --b; return s.substr(a, b - a); } std::string strip_quotes(const std::string& s) { if (s.size() >= 2) { if ((s.front() == '"' && s.back() == '"') || (s.front() == '\'' && s.back() == '\'')) { return s.substr(1, s.size() - 2); } } return s; } std::string lower(std::string s) { for (auto& c : s) c = (char)std::tolower((unsigned char)c); return s; } // Parsea una lista inline `[a, b, c]` o "[Webpage, Url]". Tolerante a // espacios y a comillas simples/dobles dentro. NO soporta listas // multi-linea — el manifest las usa siempre inline. std::vector parse_inline_list(const std::string& v) { std::vector out; std::string s = strip(v); if (s.size() < 2 || s.front() != '[' || s.back() != ']') return out; s = s.substr(1, s.size() - 2); std::string token; auto flush = [&]() { std::string t = strip_quotes(strip(token)); if (!t.empty()) out.push_back(std::move(t)); token.clear(); }; for (char c : s) { if (c == ',') flush(); else token.push_back(c); } flush(); return out; } // Manifest YAML soportado (subset): // id: fetch_webpage // name: "Fetch web page" // description: "..." // applies_to: [Webpage, Url] // params: <- v1 ignora bloque // - { name: timeout_s, ... } // // Las claves anidadas bajo `params:` se ignoran (saltamos lineas indentadas). bool parse_manifest(const std::string& path, EnricherSpec* out) { std::ifstream f(path); if (!f) return false; std::string line; bool in_skip_block = false; while (std::getline(f, line)) { // Strip CR de Windows. if (!line.empty() && line.back() == '\r') line.pop_back(); // Linea blanca o comentario. std::string trim = strip(line); if (trim.empty() || trim.front() == '#') continue; // Si la linea NO empieza con whitespace, salimos del bloque skip. bool indented = !line.empty() && std::isspace((unsigned char)line.front()); if (!indented) in_skip_block = false; if (in_skip_block) continue; size_t colon = trim.find(':'); if (colon == std::string::npos) continue; std::string key = strip(trim.substr(0, colon)); std::string val = strip(trim.substr(colon + 1)); if (key == "id") out->id = strip_quotes(val); else if (key == "name") out->name = strip_quotes(val); else if (key == "description") out->description = strip_quotes(val); else if (key == "applies_to") out->applies_to = parse_inline_list(val); else if (key == "params" && val.empty()) in_skip_block = true; // emits/relations los ignoramos en v1 (solo informativos). } return !out->id.empty(); } } // namespace int enrichers_load(const char* enrichers_dir) { g_enrichers.clear(); if (!enrichers_dir || !*enrichers_dir) return -1; // En Windows los UNC paths esperan backslashes consistentes; mixed // separators (`\\wsl$\\foo/bar`) confunden a opendir de MinGW. std::string dir = enrichers_dir; #ifdef _WIN32 for (char& c : dir) if (c == '/') c = '\\'; #endif DIR* d = opendir(dir.c_str()); if (!d) { std::fprintf(stderr, "[enrichers] opendir failed: %s\n", dir.c_str()); return -1; } struct dirent* ent; while ((ent = readdir(d)) != nullptr) { if (ent->d_name[0] == '.') continue; #ifdef _WIN32 const char sep = '\\'; #else const char sep = '/'; #endif std::string sub = dir + sep + ent->d_name; struct stat st{}; if (stat(sub.c_str(), &st) != 0 || !S_ISDIR(st.st_mode)) continue; std::string manifest = sub + sep + "manifest.yaml"; std::string runpy = sub + sep + "run.py"; if (stat(manifest.c_str(), &st) != 0) continue; if (stat(runpy.c_str(), &st) != 0) continue; EnricherSpec spec; if (!parse_manifest(manifest, &spec)) { std::fprintf(stderr, "[enrichers] parse failed: %s\n", manifest.c_str()); continue; } spec.run_path = runpy; g_enrichers.push_back(std::move(spec)); } closedir(d); std::sort(g_enrichers.begin(), g_enrichers.end(), [](const EnricherSpec& a, const EnricherSpec& b) { return a.name < b.name; }); return (int)g_enrichers.size(); } const std::vector& enrichers_all() { return g_enrichers; } std::vector enrichers_for_type(const char* type_ref) { std::vector out; if (!type_ref || !*type_ref) return out; std::string want = lower(type_ref); for (const auto& e : g_enrichers) { if (e.applies_to.empty()) { out.push_back(e); continue; } for (const auto& t : e.applies_to) { if (lower(t) == want) { out.push_back(e); break; } } } return out; } const EnricherSpec* enricher_by_id(const char* id) { if (!id || !*id) return nullptr; for (const auto& e : g_enrichers) { if (e.id == id) return &e; } return nullptr; } } // namespace ge