From fdc6b91f4d97e40ad1cd1ca77c22b29f50770ef3 Mon Sep 17 00:00:00 2001 From: Egutierrez Date: Mon, 4 May 2026 14:24:26 +0200 Subject: [PATCH] =?UTF-8?q?feat(0013):=20add=20extract=5Fpanel=20=E2=80=94?= =?UTF-8?q?=20UI=20+=20subprocess=20+=20apply=20(dedupe)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit extract_panel.{h,cpp}: panel ImGui dockeable con textarea grande, boton Extract que lanza enrichers/paste_extract/run.py en un std::thread aparte (no bloquea UI), tablas editables de entidades y relaciones propuestas con checkboxes, y boton Apply Selected que persiste a operations.db con dedupe por (type_ref, name) y por (from, to, name) en relaciones. Parser JSON ad-hoc (suficiente para el contrato del enricher) para no añadir dependencias. Apply usa SQLite directamente (mismo patron que entity_ops/jobs.cpp). Anade panel_extract a AppState. La logica apply esta separada de ImGui para poder testarla en aislamiento desde pytest. Co-Authored-By: Claude Opus 4.7 (1M context) --- extract_panel.cpp | 1079 +++++++++++++++++++++++++++++++++++++++++++++ extract_panel.h | 139 ++++++ views.h | 1 + 3 files changed, 1219 insertions(+) create mode 100644 extract_panel.cpp create mode 100644 extract_panel.h diff --git a/extract_panel.cpp b/extract_panel.cpp new file mode 100644 index 0000000..96b3038 --- /dev/null +++ b/extract_panel.cpp @@ -0,0 +1,1079 @@ +#include "extract_panel.h" +#include "views.h" +#include "entity_ops.h" + +#include "imgui.h" +#include "core/icons_tabler.h" +#include "core/tokens.h" + +#include "../../../../cpp/vendor/sqlite3/sqlite3.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef _WIN32 + #ifndef WIN32_LEAN_AND_MEAN + #define WIN32_LEAN_AND_MEAN + #endif + #include +#else + #include + #include + #include + #include + #include + #include +#endif + +namespace ge { + +// --------------------------------------------------------------------------- +// Estado del modulo +// --------------------------------------------------------------------------- + +namespace { + +struct ModuleState { + std::string enrichers_dir; + std::string app_dir; + std::string registry_root; +}; + +ModuleState g_mod; +ExtractPanelState* g_panel = nullptr; + +ExtractPanelState& panel_state() { + if (!g_panel) { + g_panel = new ExtractPanelState(); + g_panel->text_buf.assign(8192, 0); + g_panel->text_initialized = true; + } + return *g_panel; +} + +bool file_exists(const std::string& p) { + struct stat st{}; + return !p.empty() && stat(p.c_str(), &st) == 0 && !S_ISDIR(st.st_mode); +} + +long long now_ms_local() { + using namespace std::chrono; + return duration_cast(system_clock::now() + .time_since_epoch()).count(); +} + +std::string now_iso_local() { + auto t = std::time(nullptr); + std::tm tm_utc{}; +#ifdef _WIN32 + gmtime_s(&tm_utc, &t); +#else + gmtime_r(&t, &tm_utc); +#endif + char buf[32]; + std::strftime(buf, sizeof(buf), "%Y-%m-%dT%H:%M:%SZ", &tm_utc); + return buf; +} + +// --------------------------------------------------------------------------- +// JSON parser minimo +// --------------------------------------------------------------------------- + +const char* skip_ws(const char* p, const char* end) { + while (p < end) { + char c = *p; + if (c == ' ' || c == '\t' || c == '\n' || c == '\r') { ++p; continue; } + break; + } + return p; +} + +bool parse_string(const char*& p, const char* end, std::string* out) { + if (p >= end || *p != '"') return false; + ++p; + out->clear(); + while (p < end) { + char c = *p++; + if (c == '"') return true; + if (c == '\\') { + if (p >= end) return false; + char esc = *p++; + switch (esc) { + case '"': out->push_back('"'); break; + case '\\': out->push_back('\\'); break; + case '/': out->push_back('/'); break; + case 'b': out->push_back('\b'); break; + case 'f': out->push_back('\f'); break; + case 'n': out->push_back('\n'); break; + case 'r': out->push_back('\r'); break; + case 't': out->push_back('\t'); break; + case 'u': { + if (p + 4 > end) return false; + unsigned cp = 0; + for (int i = 0; i < 4; ++i) { + char h = p[i]; cp <<= 4; + if (h >= '0' && h <= '9') cp |= (h - '0'); + else if (h >= 'a' && h <= 'f') cp |= (h - 'a' + 10); + else if (h >= 'A' && h <= 'F') cp |= (h - 'A' + 10); + else return false; + } + p += 4; + if (cp < 0x80) out->push_back((char)cp); + else if (cp < 0x800) { + out->push_back((char)(0xC0 | (cp >> 6))); + out->push_back((char)(0x80 | (cp & 0x3F))); + } else { + out->push_back((char)(0xE0 | (cp >> 12))); + out->push_back((char)(0x80 | ((cp >> 6) & 0x3F))); + out->push_back((char)(0x80 | (cp & 0x3F))); + } + break; + } + default: out->push_back(esc); break; + } + } else { + out->push_back(c); + } + } + return false; +} + +bool skip_value(const char*& p, const char* end, std::string* lit_out = nullptr) { + p = skip_ws(p, end); + if (p >= end) return false; + const char* start = p; + if (*p == '"') { + std::string tmp; + bool ok = parse_string(p, end, &tmp); + if (ok && lit_out) { lit_out->assign("\""); *lit_out += tmp; *lit_out += "\""; } + return ok; + } + if (*p == '{' || *p == '[') { + char open = *p, close = (open == '{' ? '}' : ']'); + int depth = 0; + bool in_str = false; + const char* obj_start = p; + while (p < end) { + char c = *p++; + if (in_str) { + if (c == '\\' && p < end) { ++p; continue; } + if (c == '"') in_str = false; + continue; + } + if (c == '"') { in_str = true; continue; } + if (c == open) ++depth; + else if (c == close) { --depth; if (depth == 0) break; } + } + if (depth != 0) return false; + if (lit_out) lit_out->assign(obj_start, p); + return true; + } + while (p < end) { + char c = *p; + if (c == ',' || c == '}' || c == ']' || + c == ' ' || c == '\t' || c == '\n' || c == '\r') break; + ++p; + } + if (lit_out) lit_out->assign(start, p); + return true; +} + +bool expect_char(const char*& p, const char* end, char c) { + p = skip_ws(p, end); + if (p >= end || *p != c) return false; + ++p; + return true; +} + +bool parse_entities_array(const char*& p, const char* end, + std::vector* out) +{ + if (!expect_char(p, end, '[')) return false; + p = skip_ws(p, end); + if (p < end && *p == ']') { ++p; return true; } + while (p < end) { + if (!expect_char(p, end, '{')) return false; + ProposedEntity e; + e.confidence = 1.0; + while (p < end) { + p = skip_ws(p, end); + if (p < end && *p == '}') { ++p; break; } + std::string key; + if (!parse_string(p, end, &key)) return false; + if (!expect_char(p, end, ':')) return false; + p = skip_ws(p, end); + if (key == "metadata") { + std::string lit; + if (!skip_value(p, end, &lit)) return false; + e.metadata_json = std::move(lit); + } else if (p < end && *p == '"') { + std::string val; + if (!parse_string(p, end, &val)) return false; + if (key == "id") e.tmp_id = std::move(val); + else if (key == "type_ref") e.type_ref = std::move(val); + else if (key == "name") e.name = std::move(val); + else if (key == "source") e.source = std::move(val); + } else { + std::string lit; + if (!skip_value(p, end, &lit)) return false; + if (key == "start") e.start_offset = std::atoi(lit.c_str()); + else if (key == "end") e.end_offset = std::atoi(lit.c_str()); + else if (key == "confidence") e.confidence = std::atof(lit.c_str()); + } + p = skip_ws(p, end); + if (p < end && *p == ',') { ++p; continue; } + } + std::snprintf(e.type_buf, sizeof(e.type_buf), "%s", e.type_ref.c_str()); + std::snprintf(e.name_buf, sizeof(e.name_buf), "%s", e.name.c_str()); + e.selected = true; + out->push_back(std::move(e)); + p = skip_ws(p, end); + if (p < end && *p == ',') { ++p; continue; } + if (p < end && *p == ']') { ++p; return true; } + } + return false; +} + +bool parse_relations_array(const char*& p, const char* end, + std::vector* out) +{ + if (!expect_char(p, end, '[')) return false; + p = skip_ws(p, end); + if (p < end && *p == ']') { ++p; return true; } + while (p < end) { + if (!expect_char(p, end, '{')) return false; + ProposedRelation r; + while (p < end) { + p = skip_ws(p, end); + if (p < end && *p == '}') { ++p; break; } + std::string key; + if (!parse_string(p, end, &key)) return false; + if (!expect_char(p, end, ':')) return false; + p = skip_ws(p, end); + if (p < end && *p == '"') { + std::string val; + if (!parse_string(p, end, &val)) return false; + if (key == "from_id") r.from_tmp_id = std::move(val); + else if (key == "to_id") r.to_tmp_id = std::move(val); + else if (key == "name") r.name = std::move(val); + else if (key == "source") r.source = std::move(val); + } else { + std::string lit; + if (!skip_value(p, end, &lit)) return false; + if (key == "confidence") r.confidence = std::atof(lit.c_str()); + } + p = skip_ws(p, end); + if (p < end && *p == ',') { ++p; continue; } + } + r.selected = true; + out->push_back(std::move(r)); + p = skip_ws(p, end); + if (p < end && *p == ',') { ++p; continue; } + if (p < end && *p == ']') { ++p; return true; } + } + return false; +} + +bool parse_layers_array(const char*& p, const char* end, + std::vector* out) +{ + if (!expect_char(p, end, '[')) return false; + p = skip_ws(p, end); + if (p < end && *p == ']') { ++p; return true; } + while (p < end) { + std::string s; + if (!parse_string(p, end, &s)) return false; + out->push_back(std::move(s)); + p = skip_ws(p, end); + if (p < end && *p == ',') { ++p; continue; } + if (p < end && *p == ']') { ++p; return true; } + } + return false; +} + +bool parse_object_paste_extract(const char*& p, const char* end, + ExtractResult* res) +{ + if (!expect_char(p, end, '{')) return false; + while (p < end) { + p = skip_ws(p, end); + if (p < end && *p == '}') { ++p; return true; } + std::string key; + if (!parse_string(p, end, &key)) return false; + if (!expect_char(p, end, ':')) return false; + p = skip_ws(p, end); + if (key == "entities") { + if (!parse_entities_array(p, end, &res->entities)) return false; + } else if (key == "relations") { + if (!parse_relations_array(p, end, &res->relations)) return false; + } else if (key == "stats") { + if (!expect_char(p, end, '{')) return false; + while (p < end) { + p = skip_ws(p, end); + if (p < end && *p == '}') { ++p; break; } + std::string sk; + if (!parse_string(p, end, &sk)) return false; + if (!expect_char(p, end, ':')) return false; + p = skip_ws(p, end); + if (sk == "layers") { + if (!parse_layers_array(p, end, &res->layers)) return false; + } else { + if (!skip_value(p, end, nullptr)) return false; + } + p = skip_ws(p, end); + if (p < end && *p == ',') { ++p; continue; } + } + } else if (key == "error") { + std::string s; + if (p < end && *p == '"') { + if (!parse_string(p, end, &s)) return false; + res->error = std::move(s); + } else { + if (!skip_value(p, end, nullptr)) return false; + } + } else { + if (!skip_value(p, end, nullptr)) return false; + } + p = skip_ws(p, end); + if (p < end && *p == ',') { ++p; continue; } + } + return false; +} + +// --------------------------------------------------------------------------- +// Subprocess +// --------------------------------------------------------------------------- + +std::string python_path() { + if (const char* env = std::getenv("FN_PYTHON"); env && *env && file_exists(env)) { + return env; + } + if (!g_mod.registry_root.empty()) { + std::string p = g_mod.registry_root + "/python/.venv/bin/python3"; +#ifdef _WIN32 + return p; +#else + if (file_exists(p)) return p; +#endif + } +#ifdef _WIN32 + return "python.exe"; +#else + return "python3"; +#endif +} + +std::string build_stdin_payload(const std::string& text, bool use_hybrid) { + auto esc = [](const std::string& s) { + std::string o; o.reserve(s.size() + 8); + for (char c : s) { + switch (c) { + case '"': o += "\\\""; break; + case '\\': o += "\\\\"; break; + case '\n': o += "\\n"; break; + case '\r': o += "\\r"; break; + case '\t': o += "\\t"; break; + default: + if ((unsigned char)c < 0x20) { + char b[8]; + std::snprintf(b, sizeof(b), "\\u%04x", (unsigned char)c); + o += b; + } else { o += c; } + } + } + return o; + }; + std::ostringstream o; + o << "{" + << "\"node_id\":\"\"," + << "\"node_name\":\"\"," + << "\"node_type\":\"\"," + << "\"metadata\":{}," + << "\"ops_db_path\":\"\"," + << "\"app_dir\":\"" << esc(g_mod.app_dir) << "\"," + << "\"cache_dir\":\"\"," + << "\"registry_root\":\"" << esc(g_mod.registry_root) << "\"," + << "\"params\":{" + << "\"text\":\"" << esc(text) << "\"," + << "\"use_hybrid\":" << (use_hybrid ? "true" : "false") + << "}" + << "}"; + return o.str(); +} + +#ifdef _WIN32 + +bool spawn_python_blocking(const std::string& script_path, + const std::string& stdin_payload, + std::string* stdout_buf, + std::string* stderr_tail, + int* exit_code) +{ + SECURITY_ATTRIBUTES sa{}; sa.nLength = sizeof(sa); sa.bInheritHandle = TRUE; + HANDLE in_r=nullptr, in_w=nullptr, out_r=nullptr, out_w=nullptr, + err_r=nullptr, err_w=nullptr; + if (!CreatePipe(&in_r,&in_w,&sa,0) || + !CreatePipe(&out_r,&out_w,&sa,0) || + !CreatePipe(&err_r,&err_w,&sa,0)) return false; + SetHandleInformation(in_w, HANDLE_FLAG_INHERIT, 0); + SetHandleInformation(out_r, HANDLE_FLAG_INHERIT, 0); + SetHandleInformation(err_r, HANDLE_FLAG_INHERIT, 0); + + std::string py = python_path(); + std::string cmd = "\"" + py + "\" \"" + script_path + "\""; + int n = MultiByteToWideChar(CP_UTF8, 0, cmd.c_str(), (int)cmd.size(), + nullptr, 0); + std::wstring w(n, 0); + MultiByteToWideChar(CP_UTF8, 0, cmd.c_str(), (int)cmd.size(), w.data(), n); + std::vector wbuf(w.begin(), w.end()); wbuf.push_back(0); + + STARTUPINFOW si{}; si.cb = sizeof(si); si.dwFlags = STARTF_USESTDHANDLES; + si.hStdInput=in_r; si.hStdOutput=out_w; si.hStdError=err_w; + PROCESS_INFORMATION pi{}; + BOOL ok = CreateProcessW(nullptr, wbuf.data(), nullptr, nullptr, TRUE, + CREATE_NO_WINDOW, nullptr, nullptr, &si, &pi); + CloseHandle(in_r); CloseHandle(out_w); CloseHandle(err_w); + if (!ok) { + if (stderr_tail) *stderr_tail = "CreateProcessW failed"; + CloseHandle(in_w); CloseHandle(out_r); CloseHandle(err_r); + return false; + } + DWORD wn = 0; + WriteFile(in_w, stdin_payload.data(), (DWORD)stdin_payload.size(), + &wn, nullptr); + CloseHandle(in_w); + + std::thread err_t([&]() { + char b[1024]; + while (true) { + DWORD m = 0; + if (!ReadFile(err_r, b, sizeof(b), &m, nullptr) || m == 0) break; + if (stderr_tail) { + stderr_tail->append(b, (size_t)m); + if (stderr_tail->size() > 4096) + stderr_tail->erase(0, stderr_tail->size() - 4096); + } + } + }); + { + char b[4096]; + while (true) { + DWORD m = 0; + if (!ReadFile(out_r, b, sizeof(b), &m, nullptr) || m == 0) break; + if (stdout_buf) stdout_buf->append(b, (size_t)m); + if (stdout_buf && stdout_buf->size() > 1024 * 1024) break; + } + } + CloseHandle(out_r); + WaitForSingleObject(pi.hProcess, INFINITE); + DWORD ec = 0; GetExitCodeProcess(pi.hProcess, &ec); + if (exit_code) *exit_code = (int)ec; + err_t.join(); + CloseHandle(err_r); CloseHandle(pi.hProcess); CloseHandle(pi.hThread); + return true; +} + +#else + +bool spawn_python_blocking(const std::string& script_path, + const std::string& stdin_payload, + std::string* stdout_buf, + std::string* stderr_tail, + int* exit_code) +{ + int p_in[2]={-1,-1}, p_out[2]={-1,-1}, p_err[2]={-1,-1}; + if (pipe(p_in) != 0 || pipe(p_out) != 0 || pipe(p_err) != 0) return false; + pid_t pid = fork(); + if (pid < 0) return false; + if (pid == 0) { + dup2(p_in[0], 0); dup2(p_out[1], 1); dup2(p_err[1], 2); + close(p_in[0]); close(p_in[1]); + close(p_out[0]); close(p_out[1]); + close(p_err[0]); close(p_err[1]); + std::string py = python_path(); + const char* argv[] = { py.c_str(), script_path.c_str(), nullptr }; + execv(py.c_str(), (char* const*)argv); + execvp(py.c_str(), (char* const*)argv); + _exit(127); + } + close(p_in[0]); close(p_out[1]); close(p_err[1]); + if (!stdin_payload.empty()) { + size_t left = stdin_payload.size(); + const char* p = stdin_payload.data(); + while (left > 0) { + ssize_t n = write(p_in[1], p, left); + if (n < 0) { if (errno == EINTR) continue; break; } + p += n; left -= (size_t)n; + } + } + close(p_in[1]); + std::thread err_t([&]() { + char b[1024]; + while (true) { + ssize_t n = read(p_err[0], b, sizeof(b)); + if (n <= 0) break; + if (stderr_tail) { + stderr_tail->append(b, (size_t)n); + if (stderr_tail->size() > 4096) + stderr_tail->erase(0, stderr_tail->size() - 4096); + } + } + }); + { + char b[4096]; + while (true) { + ssize_t n = read(p_out[0], b, sizeof(b)); + if (n <= 0) break; + if (stdout_buf) stdout_buf->append(b, (size_t)n); + if (stdout_buf && stdout_buf->size() > 1024 * 1024) break; + } + } + close(p_out[0]); + int status = 0; + waitpid(pid, &status, 0); + err_t.join(); + close(p_err[0]); + if (exit_code) { + *exit_code = WIFEXITED(status) ? WEXITSTATUS(status) : -1; + } + return true; +} + +#endif + +// --------------------------------------------------------------------------- +// Apply (lado SQLite) +// --------------------------------------------------------------------------- + +bool find_existing_entity(sqlite3* db, const std::string& type_ref, + const std::string& name, std::string* out_id) +{ + sqlite3_stmt* st = nullptr; + const char* sql = + "SELECT id FROM entities WHERE type_ref = ? AND name = ? LIMIT 1"; + if (sqlite3_prepare_v2(db, sql, -1, &st, nullptr) != SQLITE_OK) return false; + sqlite3_bind_text(st, 1, type_ref.c_str(), -1, SQLITE_TRANSIENT); + sqlite3_bind_text(st, 2, name.c_str(), -1, SQLITE_TRANSIENT); + bool found = false; + if (sqlite3_step(st) == SQLITE_ROW) { + const unsigned char* t = sqlite3_column_text(st, 0); + if (t) { *out_id = (const char*)t; found = true; } + } + sqlite3_finalize(st); + return found; +} + +bool insert_entity_with_metadata(sqlite3* db, + const std::string& id, + const std::string& name, + const std::string& type_ref, + const std::string& metadata_json, + const std::string& ts) +{ + sqlite3_stmt* st = nullptr; + const char* sql = + "INSERT INTO entities (id, name, type_ref, source, metadata, " + " created_at, updated_at) " + "VALUES (?, ?, ?, 'panel:paste_extract', ?, ?, ?)"; + if (sqlite3_prepare_v2(db, sql, -1, &st, nullptr) != SQLITE_OK) return false; + sqlite3_bind_text(st, 1, id.c_str(), -1, SQLITE_TRANSIENT); + sqlite3_bind_text(st, 2, name.c_str(), -1, SQLITE_TRANSIENT); + sqlite3_bind_text(st, 3, type_ref.c_str(), -1, SQLITE_TRANSIENT); + const std::string md = metadata_json.empty() ? std::string("{}") + : metadata_json; + sqlite3_bind_text(st, 4, md.c_str(), -1, SQLITE_TRANSIENT); + sqlite3_bind_text(st, 5, ts.c_str(), -1, SQLITE_TRANSIENT); + sqlite3_bind_text(st, 6, ts.c_str(), -1, SQLITE_TRANSIENT); + bool ok = sqlite3_step(st) == SQLITE_DONE; + sqlite3_finalize(st); + return ok; +} + +bool relation_exists(sqlite3* db, const std::string& from_id, + const std::string& to_id, const std::string& name) +{ + sqlite3_stmt* st = nullptr; + const char* sql = + "SELECT 1 FROM relations WHERE from_entity = ? AND to_entity = ? " + "AND name = ? LIMIT 1"; + if (sqlite3_prepare_v2(db, sql, -1, &st, nullptr) != SQLITE_OK) return false; + sqlite3_bind_text(st, 1, from_id.c_str(), -1, SQLITE_TRANSIENT); + sqlite3_bind_text(st, 2, to_id.c_str(), -1, SQLITE_TRANSIENT); + sqlite3_bind_text(st, 3, name.c_str(), -1, SQLITE_TRANSIENT); + bool found = (sqlite3_step(st) == SQLITE_ROW); + sqlite3_finalize(st); + return found; +} + +bool insert_relation_simple(sqlite3* db, const std::string& id, + const std::string& name, + const std::string& from_id, + const std::string& to_id, + const std::string& ts) +{ + sqlite3_stmt* st = nullptr; + const char* sql = + "INSERT INTO relations (id, name, from_entity, to_entity, " + " created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?)"; + if (sqlite3_prepare_v2(db, sql, -1, &st, nullptr) != SQLITE_OK) return false; + sqlite3_bind_text(st, 1, id.c_str(), -1, SQLITE_TRANSIENT); + sqlite3_bind_text(st, 2, name.c_str(), -1, SQLITE_TRANSIENT); + sqlite3_bind_text(st, 3, from_id.c_str(), -1, SQLITE_TRANSIENT); + sqlite3_bind_text(st, 4, to_id.c_str(), -1, SQLITE_TRANSIENT); + sqlite3_bind_text(st, 5, ts.c_str(), -1, SQLITE_TRANSIENT); + sqlite3_bind_text(st, 6, ts.c_str(), -1, SQLITE_TRANSIENT); + bool ok = sqlite3_step(st) == SQLITE_DONE; + sqlite3_finalize(st); + return ok; +} + +} // namespace + +// --------------------------------------------------------------------------- +// API publica +// --------------------------------------------------------------------------- + +void extract_panel_init(const char* enrichers_dir, + const char* app_dir, + const char* registry_root) +{ + g_mod.enrichers_dir = enrichers_dir ? enrichers_dir : ""; + g_mod.app_dir = app_dir ? app_dir : ""; + g_mod.registry_root = registry_root ? registry_root : ""; +} + +void extract_panel_shutdown() { + if (!g_panel) return; + if (g_panel->worker.joinable()) g_panel->worker.join(); + delete g_panel; + g_panel = nullptr; +} + +bool extract_panel_parse_result(const std::string& json_text, + ExtractResult* res) +{ + if (!res) return false; + res->entities.clear(); + res->relations.clear(); + res->layers.clear(); + res->error.clear(); + if (json_text.empty()) { + res->error = "empty result"; + return false; + } + size_t end = json_text.size(); + while (end > 0 && (json_text[end-1] == '\n' || json_text[end-1] == '\r' || + json_text[end-1] == ' ' || json_text[end-1] == '\t')) + --end; + size_t start = end > 0 ? json_text.rfind('\n', end - 1) : std::string::npos; + start = (start == std::string::npos) ? 0 : start + 1; + const char* p = json_text.data() + start; + const char* e = json_text.data() + end; + p = skip_ws(p, e); + if (!parse_object_paste_extract(p, e, res)) { + res->error = "json parse error"; + return false; + } + return res->error.empty(); +} + +bool extract_panel_run_subprocess(const std::string& text, + bool use_hybrid, + ExtractResult* out) +{ + if (!out) return false; + out->entities.clear(); + out->relations.clear(); + out->layers.clear(); + out->error.clear(); + out->stderr_tail.clear(); + + std::string script; + if (!g_mod.enrichers_dir.empty()) { + script = g_mod.enrichers_dir + "/paste_extract/run.py"; + } + if (script.empty() || !file_exists(script)) { + out->error = "paste_extract/run.py not found"; + return false; + } + std::string stdin_payload = build_stdin_payload(text, use_hybrid); + std::string stdout_buf; + std::string stderr_tail; + int rc = -1; + if (!spawn_python_blocking(script, stdin_payload, &stdout_buf, + &stderr_tail, &rc)) { + out->error = "spawn failed"; + out->stderr_tail = std::move(stderr_tail); + return false; + } + out->stderr_tail = std::move(stderr_tail); + if (rc != 0) { + char b[64]; + std::snprintf(b, sizeof(b), "exit %d", rc); + out->error = b; + return false; + } + if (!extract_panel_parse_result(stdout_buf, out)) { + return false; + } + return true; +} + +bool extract_panel_apply(const char* ops_db_path, + const ExtractResult& result, + int* out_added_entities, + int* out_dedup_entities, + int* out_added_relations, + int* out_skipped_relations) +{ + if (out_added_entities) *out_added_entities = 0; + if (out_dedup_entities) *out_dedup_entities = 0; + if (out_added_relations) *out_added_relations = 0; + if (out_skipped_relations) *out_skipped_relations = 0; + if (!ops_db_path || !*ops_db_path) return false; + + sqlite3* db = nullptr; + if (sqlite3_open_v2(ops_db_path, &db, SQLITE_OPEN_READWRITE, nullptr) + != SQLITE_OK) { + if (db) sqlite3_close(db); + return false; + } + sqlite3_exec(db, "BEGIN", nullptr, nullptr, nullptr); + + std::string ts = now_iso_local(); + std::unordered_map map_id; + map_id.reserve(result.entities.size()); + + int idx = 0; + int added_e = 0, dedup_e = 0; + for (const auto& e : result.entities) { + if (!e.selected) { ++idx; continue; } + std::string type_ref = e.type_buf[0] ? std::string(e.type_buf) : e.type_ref; + std::string name = e.name_buf[0] ? std::string(e.name_buf) : e.name; + if (type_ref.empty() || name.empty()) { ++idx; continue; } + std::string sql_id; + if (find_existing_entity(db, type_ref, name, &sql_id)) { + map_id[e.tmp_id] = sql_id; + ++dedup_e; + } else { + char id_buf[96]; + std::snprintf(id_buf, sizeof(id_buf), "%s_%lld_%d", + type_ref.c_str(), now_ms_local(), idx); + std::string new_id = id_buf; + if (!insert_entity_with_metadata(db, new_id, name, type_ref, + e.metadata_json, ts)) { + sqlite3_exec(db, "ROLLBACK", nullptr, nullptr, nullptr); + sqlite3_close(db); + return false; + } + map_id[e.tmp_id] = new_id; + ++added_e; + } + ++idx; + } + + int added_r = 0, skipped_r = 0; + int ridx = 0; + for (const auto& r : result.relations) { + ++ridx; + if (!r.selected) continue; + auto it_from = map_id.find(r.from_tmp_id); + auto it_to = map_id.find(r.to_tmp_id); + if (it_from == map_id.end() || it_to == map_id.end()) { + ++skipped_r; continue; + } + std::string name = r.name.empty() ? std::string("RELATED_TO") : r.name; + if (relation_exists(db, it_from->second, it_to->second, name)) { + ++skipped_r; continue; + } + char id_buf[96]; + std::snprintf(id_buf, sizeof(id_buf), "rel_%lld_%d", + now_ms_local(), ridx); + if (!insert_relation_simple(db, id_buf, name, + it_from->second, it_to->second, ts)) { + sqlite3_exec(db, "ROLLBACK", nullptr, nullptr, nullptr); + sqlite3_close(db); + return false; + } + ++added_r; + } + + sqlite3_exec(db, "COMMIT", nullptr, nullptr, nullptr); + sqlite3_close(db); + + if (out_added_entities) *out_added_entities = added_e; + if (out_dedup_entities) *out_dedup_entities = dedup_e; + if (out_added_relations) *out_added_relations = added_r; + if (out_skipped_relations) *out_skipped_relations = skipped_r; + return true; +} + +// --------------------------------------------------------------------------- +// Render (ImGui) +// --------------------------------------------------------------------------- + +namespace { + +void launch_extract_async(ExtractPanelState& s, bool use_hybrid) { + if (s.busy.load()) return; + if (s.worker.joinable()) s.worker.join(); // por si hubo uno previo + + // Snapshot del texto (el buffer puede mutar mientras corre). + std::string text(s.text_buf.data()); + s.busy.store(true); + s.status = "Extracting..."; + s.new_result.store(false); + + s.worker = std::thread([&s, text, use_hybrid]() { + auto r = std::make_shared(); + bool ok = extract_panel_run_subprocess(text, use_hybrid, r.get()); + { + std::lock_guard lk(s.result_mu); + s.result = r; + char buf[128]; + if (ok) { + std::snprintf(buf, sizeof(buf), + "OK — %zu entities, %zu relations", + r->entities.size(), r->relations.size()); + } else { + std::snprintf(buf, sizeof(buf), "ERROR: %s", + r->error.c_str()); + } + s.status = buf; + } + s.new_result.store(true); + s.busy.store(false); + }); +} + +} // namespace + +void extract_panel_render(AppState& app) { + if (!app.panel_extract) return; + + ExtractPanelState& s = panel_state(); + + if (!ImGui::Begin("Extract", &app.panel_extract)) { + ImGui::End(); + return; + } + + // Top bar: hybrid toggle + Extract button + status. + ImGui::Checkbox("Use hybrid (GLiNER/GLiREL)", &s.use_hybrid); + ImGui::SameLine(); + bool busy = s.busy.load(); + ImGui::BeginDisabled(busy || s.text_buf.size() < 2 || s.text_buf[0] == 0); + if (ImGui::Button(busy ? "Extracting..." : (TI_BOLT " Extract"))) { + launch_extract_async(s, s.use_hybrid); + } + ImGui::EndDisabled(); + ImGui::SameLine(); + ImGui::TextDisabled("%s", s.status.c_str()); + + // Multi-line text input. Crece dinamicamente si el usuario pega un + // texto largo (ImGuiInputTextFlags_CallbackResize). + auto resize_cb = [](ImGuiInputTextCallbackData* data) -> int { + if (data->EventFlag == ImGuiInputTextFlags_CallbackResize) { + auto* buf = (std::vector*)data->UserData; + buf->resize(data->BufTextLen + 1); + data->Buf = buf->data(); + } + return 0; + }; + + ImVec2 input_size(-1.0f, ImGui::GetContentRegionAvail().y * 0.45f); + ImGui::InputTextMultiline("##paste_text", + s.text_buf.data(), s.text_buf.size(), + input_size, + ImGuiInputTextFlags_CallbackResize, resize_cb, &s.text_buf); + + ImGui::Separator(); + + // Tablas de propuestas. + std::shared_ptr res; + { + std::lock_guard lk(s.result_mu); + res = s.result; + } + if (!res) { + ImGui::TextDisabled("Pega texto y pulsa Extract para ver propuestas."); + ImGui::End(); + return; + } + if (!res->error.empty()) { + ImGui::TextColored(ImVec4(0.95f,0.45f,0.45f,1.0f), + "Error: %s", res->error.c_str()); + if (!res->stderr_tail.empty()) { + ImGui::TextWrapped("%s", res->stderr_tail.c_str()); + } + ImGui::End(); + return; + } + + if (!res->layers.empty()) { + std::string layers = "Layers: "; + for (size_t i = 0; i < res->layers.size(); ++i) { + if (i) layers += ", "; + layers += res->layers[i]; + } + ImGui::TextDisabled("%s", layers.c_str()); + } + + // Toolbar para select-all / none. + if (ImGui::SmallButton("All")) { + for (auto& e : res->entities) e.selected = true; + for (auto& r : res->relations) r.selected = true; + } + ImGui::SameLine(); + if (ImGui::SmallButton("None")) { + for (auto& e : res->entities) e.selected = false; + for (auto& r : res->relations) r.selected = false; + } + ImGui::SameLine(); + int sel_e = 0, sel_r = 0; + for (const auto& e : res->entities) if (e.selected) ++sel_e; + for (const auto& r : res->relations) if (r.selected) ++sel_r; + ImGui::TextDisabled("Selected: %d entities, %d relations", sel_e, sel_r); + + // Apply Selected. + ImGui::SameLine(); + bool can_apply = (sel_e + sel_r) > 0 && !app.input_db_path.empty(); + ImGui::BeginDisabled(!can_apply); + if (ImGui::Button(TI_CHECK " Apply Selected")) { + int ae=0, de=0, ar=0, sr=0; + bool ok = extract_panel_apply(app.input_db_path.c_str(), + *res, &ae, &de, &ar, &sr); + if (ok) { + s.last_apply_entities = ae; + s.last_apply_relations = ar; + s.last_apply_dedup = de; + char buf[160]; + std::snprintf(buf, sizeof(buf), + "Applied: +%d entities, +%d relations (%d deduped, %d skipped)", + ae, ar, de, sr); + s.status = buf; + app.want_reload = true; + } else { + s.status = "Apply failed"; + } + } + ImGui::EndDisabled(); + if (app.input_db_path.empty()) { + ImGui::SameLine(); + ImGui::TextColored(ImVec4(1.0f,0.7f,0.3f,1.0f), + "(no operations.db loaded)"); + } + + // Tabla de entidades. + if (!res->entities.empty() && + ImGui::CollapsingHeader("Entities", ImGuiTreeNodeFlags_DefaultOpen)) { + if (ImGui::BeginTable("##ents", 5, + ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg | + ImGuiTableFlags_ScrollY, + ImVec2(0.0f, 200.0f))) { + ImGui::TableSetupColumn("", ImGuiTableColumnFlags_WidthFixed, 28.0f); + ImGui::TableSetupColumn("Type", ImGuiTableColumnFlags_WidthFixed, 120.0f); + ImGui::TableSetupColumn("Name"); + ImGui::TableSetupColumn("Span", ImGuiTableColumnFlags_WidthFixed, 90.0f); + ImGui::TableSetupColumn("Src", ImGuiTableColumnFlags_WidthFixed, 60.0f); + ImGui::TableHeadersRow(); + + for (size_t i = 0; i < res->entities.size(); ++i) { + auto& e = res->entities[i]; + ImGui::TableNextRow(); + ImGui::PushID((int)i); + + ImGui::TableNextColumn(); + ImGui::Checkbox("##sel", &e.selected); + + ImGui::TableNextColumn(); + ImGui::SetNextItemWidth(-1); + ImGui::InputText("##type", e.type_buf, sizeof(e.type_buf)); + + ImGui::TableNextColumn(); + ImGui::SetNextItemWidth(-1); + ImGui::InputText("##name", e.name_buf, sizeof(e.name_buf)); + + ImGui::TableNextColumn(); + if (e.start_offset >= 0) { + ImGui::Text("%d-%d", e.start_offset, e.end_offset); + } else { + ImGui::TextDisabled("—"); + } + + ImGui::TableNextColumn(); + ImGui::TextDisabled("%s", e.source.c_str()); + + ImGui::PopID(); + } + ImGui::EndTable(); + } + } + + // Tabla de relaciones. + if (!res->relations.empty() && + ImGui::CollapsingHeader("Relations", ImGuiTreeNodeFlags_DefaultOpen)) { + if (ImGui::BeginTable("##rels", 5, + ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg | + ImGuiTableFlags_ScrollY, + ImVec2(0.0f, 160.0f))) { + ImGui::TableSetupColumn("", ImGuiTableColumnFlags_WidthFixed, 28.0f); + ImGui::TableSetupColumn("From", ImGuiTableColumnFlags_WidthFixed, 100.0f); + ImGui::TableSetupColumn("Name"); + ImGui::TableSetupColumn("To", ImGuiTableColumnFlags_WidthFixed, 100.0f); + ImGui::TableSetupColumn("Conf", ImGuiTableColumnFlags_WidthFixed, 60.0f); + ImGui::TableHeadersRow(); + + // Lookup helper: tmp_id -> entity name (para mostrar en From/To). + auto entity_label = [&](const std::string& tmp) -> std::string { + for (const auto& e : res->entities) { + if (e.tmp_id == tmp) { + std::string n = e.name_buf[0] ? e.name_buf : e.name; + if (n.size() > 18) n = n.substr(0, 15) + "..."; + return n; + } + } + return tmp; + }; + + for (size_t i = 0; i < res->relations.size(); ++i) { + auto& r = res->relations[i]; + ImGui::TableNextRow(); + ImGui::PushID(2000 + (int)i); + + ImGui::TableNextColumn(); + ImGui::Checkbox("##sel", &r.selected); + + ImGui::TableNextColumn(); + ImGui::TextUnformatted(entity_label(r.from_tmp_id).c_str()); + + ImGui::TableNextColumn(); + ImGui::TextUnformatted(r.name.c_str()); + + ImGui::TableNextColumn(); + ImGui::TextUnformatted(entity_label(r.to_tmp_id).c_str()); + + ImGui::TableNextColumn(); + ImGui::Text("%.2f", r.confidence); + + ImGui::PopID(); + } + ImGui::EndTable(); + } + } + + ImGui::End(); +} + +} // namespace ge diff --git a/extract_panel.h b/extract_panel.h new file mode 100644 index 0000000..19d4601 --- /dev/null +++ b/extract_panel.h @@ -0,0 +1,139 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +// Panel "Paste & Extract" (issue 0013). +// +// Textarea grande para pegar texto. Boton Extract lanza el script +// `enrichers/paste_extract/run.py` en un hilo aparte (no bloquea UI). +// El script devuelve un JSON con entidades y relaciones propuestas (modo +// preview — no escribe a operations.db). El panel muestra dos tablas +// (entidades / relaciones) con checkboxes; al pulsar "Apply Selected" +// se persisten via entity_ops con dedupe por (type_ref, name). +// +// Threading: una llamada Extract a la vez (extract_busy bool). El hilo +// rellena la propuesta tras hacerse el subprocess. Apply corre en el +// thread principal y dispara reload del grafo via app.want_reload. +// +// El enricher esta declarado en `enrichers/paste_extract/manifest.yaml` +// pero NO se invoca via el sistema de jobs — el panel lo lanza +// directamente. Vivir en `enrichers/` permite que se distribuya y que +// el script use el mismo Python runtime resolution que el resto. + +namespace ge { + +struct AppState; + +// Una entidad propuesta por el extractor. Se guarda como string para +// poder editarla inline antes del Apply. +struct ProposedEntity { + std::string tmp_id; // "tmp_0", "tmp_1", ... vinculado a relaciones + std::string type_ref; // editable + std::string name; // editable + std::string source; // "regex" | "hybrid" + int start_offset = -1; // span en el texto pegado + int end_offset = -1; + double confidence = 1.0; + std::string metadata_json; // JSON literal (no editable v1) + bool selected = true; + + // Buffers mutables para edicion inline en ImGui. + char type_buf[64] = {}; + char name_buf[256] = {}; +}; + +struct ProposedRelation { + std::string from_tmp_id; + std::string to_tmp_id; + std::string name; // ej: "works_at" + std::string source; // "hybrid" | ... + double confidence = 0.0; + bool selected = true; +}; + +struct ExtractResult { + std::vector entities; + std::vector relations; + std::vector layers; + std::string error; // vacio si OK + std::string stderr_tail; +}; + +struct ExtractPanelState { + // Buffer de texto del textarea. Crece dinamicamente. + std::vector text_buf; + bool text_initialized = false; + + // Resultado del ultimo Extract (poblado por el worker thread). + std::shared_ptr result; + std::mutex result_mu; + std::atomic busy{false}; + std::atomic new_result{false}; // hay resultado fresco + + // Mensaje de status (en el footer) — refrescado por el worker. + std::string status; + + // Stats del ultimo apply. + int last_apply_entities = 0; + int last_apply_relations = 0; + int last_apply_dedup = 0; + + // Toggle: ¿usar hybrid (GLiNER/GLiREL) si esta disponible? + bool use_hybrid = false; + + // Worker thread; joinable cuando esta vivo. + std::thread worker; +}; + +// Configura paths que el worker necesita para invocar Python. Llamar una +// vez tras `jobs_init` (re-usa el resolver de Python runtime + paths). +void extract_panel_init(const char* enrichers_dir, + const char* app_dir, + const char* registry_root); + +// Suelta el worker thread si esta corriendo (cancelable). Llamar al +// shutdown de la app. +void extract_panel_shutdown(); + +// Renderiza el panel. Si app.panel_extract es false, retorna sin dibujar. +void extract_panel_render(AppState& app); + +// Aplica las entidades/relaciones marcadas como selected al +// operations.db indicado. Inserta entidades nuevas con dedupe por +// (type_ref, name); reusa el id existente si lo encuentra. Despues +// inserta las relaciones cuyos endpoints (mapeados via tmp_id -> +// real_id) sean ambos validos. +// +// Devuelve los conteos en out_added_entities, out_dedup_entities, +// out_added_relations. Tolera que algunas relaciones no resuelvan +// (out_skipped_relations). El caller decide si setear app.want_reload. +// +// Esta funcion es testeable en aislamiento (no toca ImGui). +bool extract_panel_apply(const char* ops_db_path, + const ExtractResult& result, + int* out_added_entities, + int* out_dedup_entities, + int* out_added_relations, + int* out_skipped_relations); + +// Helper interno expuesto para tests: parsea el JSON que produce +// `enrichers/paste_extract/run.py`. Devuelve true si el parseo es OK. +// En error, result.error se rellena. +bool extract_panel_parse_result(const std::string& json_text, + ExtractResult* result); + +// Spawnea el subprocess Python para extraer. Sincronico (bloquea el +// hilo del caller). El panel lo invoca en un std::thread aparte para +// no congelar la UI. Expuesto por si los tests quieren llamarlo +// directamente (no por ahora — los tests cubren el lado Python via +// pytest, y el lado C++ via parse_result + apply). +bool extract_panel_run_subprocess(const std::string& text, + bool use_hybrid, + ExtractResult* out); + +} // namespace ge diff --git a/views.h b/views.h index da3ab19..e83015d 100644 --- a/views.h +++ b/views.h @@ -62,6 +62,7 @@ struct AppState { bool panel_note = false; bool panel_jobs = false; // issue 0026 bool panel_chat = false; // claude -p chat (issue 0001) + bool panel_extract = false; // paste & extract (issue 0013) bool show_filters_modal = false; bool show_open_modal = false;