feat(0035e): manifest auto_group_threshold override + propagacion a Python

Manifest YAML puede declarar 'auto_group_threshold: <int>' a nivel
top-level. enrichers.cpp lo parsea y lo guarda en EnricherSpec.
jobs.cpp lo inyecta como campo opcional 'auto_group_threshold' en el
JSON stdin del subprocess. Los enrichers Python que crean Groups
(web_search, split_words, split_sentences, extract_iocs_text) leen el
campo y, si viene > 0, lo usan en lugar de su DEFAULT_GROUP_THRESHOLD.
Helper _coerce_threshold tolera int / str / None / 0 cayendo al default.
This commit is contained in:
2026-05-04 14:20:52 +02:00
parent 65a14749f3
commit 52495af779
7 changed files with 92 additions and 10 deletions
+12 -4
View File
@@ -391,7 +391,8 @@ std::string build_stdin_json(const std::string& job_id,
const std::string& ops_db,
const std::string& app_dir,
const std::string& registry_root,
const std::string& lang)
const std::string& lang,
int auto_group_threshold = 0)
{
std::string node_type, node_name, node_metadata = "{}";
if (!node_id.empty()) {
@@ -457,8 +458,14 @@ std::string build_stdin_json(const std::string& job_id,
<< "\"ops_db_path\":\"" << json_escape(ops_db_out) << "\","
<< "\"app_dir\":\"" << json_escape(app_dir_out) << "\","
<< "\"cache_dir\":\"" << json_escape(cache_dir) << "\","
<< "\"registry_root\":\"" << json_escape(root_out) << "\""
<< '}';
<< "\"registry_root\":\"" << json_escape(root_out) << "\"";
// Issue 0035e: solo emitimos el campo si el manifest declara override.
// Asi las pruebas que NO setean el campo siguen viendo defaults estables
// y los enrichers Python solo lo leen cuando viene declarado.
if (auto_group_threshold > 0) {
o << ",\"auto_group_threshold\":" << auto_group_threshold;
}
o << '}';
return o.str();
}
@@ -1050,7 +1057,8 @@ void worker_loop() {
}
std::string stdin_payload = build_stdin_json(
ctx.id, ctx.enricher_id, ctx.node_id, ctx.params_json,
ops_db, g_state->app_dir, g_state->registry_root, lang);
ops_db, g_state->app_dir, g_state->registry_root, lang,
spec->auto_group_threshold);
ProcResult res = run_subprocess(job_id, run_path, lang,
stdin_payload, ctrl);