fdc6b91f4d
extract_panel.{h,cpp}: panel ImGui dockeable con textarea grande,
boton Extract que lanza enrichers/paste_extract/run.py en un
std::thread aparte (no bloquea UI), tablas editables de entidades y
relaciones propuestas con checkboxes, y boton Apply Selected que
persiste a operations.db con dedupe por (type_ref, name) y por
(from, to, name) en relaciones.
Parser JSON ad-hoc (suficiente para el contrato del enricher) para
no añadir dependencias. Apply usa SQLite directamente (mismo
patron que entity_ops/jobs.cpp).
Anade panel_extract a AppState. La logica apply esta separada de
ImGui para poder testarla en aislamiento desde pytest.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1080 lines
38 KiB
C++
1080 lines
38 KiB
C++
#include "extract_panel.h"
|
|
#include "views.h"
|
|
#include "entity_ops.h"
|
|
|
|
#include "imgui.h"
|
|
#include "core/icons_tabler.h"
|
|
#include "core/tokens.h"
|
|
|
|
#include "../../../../cpp/vendor/sqlite3/sqlite3.h"
|
|
|
|
#include <atomic>
|
|
#include <chrono>
|
|
#include <cstdio>
|
|
#include <cstdlib>
|
|
#include <cstring>
|
|
#include <ctime>
|
|
#include <filesystem>
|
|
#include <sstream>
|
|
#include <string>
|
|
#include <sys/stat.h>
|
|
#include <thread>
|
|
#include <unordered_map>
|
|
#include <vector>
|
|
|
|
#ifdef _WIN32
|
|
#ifndef WIN32_LEAN_AND_MEAN
|
|
#define WIN32_LEAN_AND_MEAN
|
|
#endif
|
|
#include <windows.h>
|
|
#else
|
|
#include <errno.h>
|
|
#include <fcntl.h>
|
|
#include <signal.h>
|
|
#include <sys/types.h>
|
|
#include <sys/wait.h>
|
|
#include <unistd.h>
|
|
#endif
|
|
|
|
namespace ge {
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Estado del modulo
|
|
// ---------------------------------------------------------------------------
|
|
|
|
namespace {
|
|
|
|
struct ModuleState {
|
|
std::string enrichers_dir;
|
|
std::string app_dir;
|
|
std::string registry_root;
|
|
};
|
|
|
|
ModuleState g_mod;
|
|
ExtractPanelState* g_panel = nullptr;
|
|
|
|
ExtractPanelState& panel_state() {
|
|
if (!g_panel) {
|
|
g_panel = new ExtractPanelState();
|
|
g_panel->text_buf.assign(8192, 0);
|
|
g_panel->text_initialized = true;
|
|
}
|
|
return *g_panel;
|
|
}
|
|
|
|
bool file_exists(const std::string& p) {
|
|
struct stat st{};
|
|
return !p.empty() && stat(p.c_str(), &st) == 0 && !S_ISDIR(st.st_mode);
|
|
}
|
|
|
|
long long now_ms_local() {
|
|
using namespace std::chrono;
|
|
return duration_cast<milliseconds>(system_clock::now()
|
|
.time_since_epoch()).count();
|
|
}
|
|
|
|
std::string now_iso_local() {
|
|
auto t = std::time(nullptr);
|
|
std::tm tm_utc{};
|
|
#ifdef _WIN32
|
|
gmtime_s(&tm_utc, &t);
|
|
#else
|
|
gmtime_r(&t, &tm_utc);
|
|
#endif
|
|
char buf[32];
|
|
std::strftime(buf, sizeof(buf), "%Y-%m-%dT%H:%M:%SZ", &tm_utc);
|
|
return buf;
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// JSON parser minimo
|
|
// ---------------------------------------------------------------------------
|
|
|
|
const char* skip_ws(const char* p, const char* end) {
|
|
while (p < end) {
|
|
char c = *p;
|
|
if (c == ' ' || c == '\t' || c == '\n' || c == '\r') { ++p; continue; }
|
|
break;
|
|
}
|
|
return p;
|
|
}
|
|
|
|
bool parse_string(const char*& p, const char* end, std::string* out) {
|
|
if (p >= end || *p != '"') return false;
|
|
++p;
|
|
out->clear();
|
|
while (p < end) {
|
|
char c = *p++;
|
|
if (c == '"') return true;
|
|
if (c == '\\') {
|
|
if (p >= end) return false;
|
|
char esc = *p++;
|
|
switch (esc) {
|
|
case '"': out->push_back('"'); break;
|
|
case '\\': out->push_back('\\'); break;
|
|
case '/': out->push_back('/'); break;
|
|
case 'b': out->push_back('\b'); break;
|
|
case 'f': out->push_back('\f'); break;
|
|
case 'n': out->push_back('\n'); break;
|
|
case 'r': out->push_back('\r'); break;
|
|
case 't': out->push_back('\t'); break;
|
|
case 'u': {
|
|
if (p + 4 > end) return false;
|
|
unsigned cp = 0;
|
|
for (int i = 0; i < 4; ++i) {
|
|
char h = p[i]; cp <<= 4;
|
|
if (h >= '0' && h <= '9') cp |= (h - '0');
|
|
else if (h >= 'a' && h <= 'f') cp |= (h - 'a' + 10);
|
|
else if (h >= 'A' && h <= 'F') cp |= (h - 'A' + 10);
|
|
else return false;
|
|
}
|
|
p += 4;
|
|
if (cp < 0x80) out->push_back((char)cp);
|
|
else if (cp < 0x800) {
|
|
out->push_back((char)(0xC0 | (cp >> 6)));
|
|
out->push_back((char)(0x80 | (cp & 0x3F)));
|
|
} else {
|
|
out->push_back((char)(0xE0 | (cp >> 12)));
|
|
out->push_back((char)(0x80 | ((cp >> 6) & 0x3F)));
|
|
out->push_back((char)(0x80 | (cp & 0x3F)));
|
|
}
|
|
break;
|
|
}
|
|
default: out->push_back(esc); break;
|
|
}
|
|
} else {
|
|
out->push_back(c);
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool skip_value(const char*& p, const char* end, std::string* lit_out = nullptr) {
|
|
p = skip_ws(p, end);
|
|
if (p >= end) return false;
|
|
const char* start = p;
|
|
if (*p == '"') {
|
|
std::string tmp;
|
|
bool ok = parse_string(p, end, &tmp);
|
|
if (ok && lit_out) { lit_out->assign("\""); *lit_out += tmp; *lit_out += "\""; }
|
|
return ok;
|
|
}
|
|
if (*p == '{' || *p == '[') {
|
|
char open = *p, close = (open == '{' ? '}' : ']');
|
|
int depth = 0;
|
|
bool in_str = false;
|
|
const char* obj_start = p;
|
|
while (p < end) {
|
|
char c = *p++;
|
|
if (in_str) {
|
|
if (c == '\\' && p < end) { ++p; continue; }
|
|
if (c == '"') in_str = false;
|
|
continue;
|
|
}
|
|
if (c == '"') { in_str = true; continue; }
|
|
if (c == open) ++depth;
|
|
else if (c == close) { --depth; if (depth == 0) break; }
|
|
}
|
|
if (depth != 0) return false;
|
|
if (lit_out) lit_out->assign(obj_start, p);
|
|
return true;
|
|
}
|
|
while (p < end) {
|
|
char c = *p;
|
|
if (c == ',' || c == '}' || c == ']' ||
|
|
c == ' ' || c == '\t' || c == '\n' || c == '\r') break;
|
|
++p;
|
|
}
|
|
if (lit_out) lit_out->assign(start, p);
|
|
return true;
|
|
}
|
|
|
|
bool expect_char(const char*& p, const char* end, char c) {
|
|
p = skip_ws(p, end);
|
|
if (p >= end || *p != c) return false;
|
|
++p;
|
|
return true;
|
|
}
|
|
|
|
bool parse_entities_array(const char*& p, const char* end,
|
|
std::vector<ProposedEntity>* out)
|
|
{
|
|
if (!expect_char(p, end, '[')) return false;
|
|
p = skip_ws(p, end);
|
|
if (p < end && *p == ']') { ++p; return true; }
|
|
while (p < end) {
|
|
if (!expect_char(p, end, '{')) return false;
|
|
ProposedEntity e;
|
|
e.confidence = 1.0;
|
|
while (p < end) {
|
|
p = skip_ws(p, end);
|
|
if (p < end && *p == '}') { ++p; break; }
|
|
std::string key;
|
|
if (!parse_string(p, end, &key)) return false;
|
|
if (!expect_char(p, end, ':')) return false;
|
|
p = skip_ws(p, end);
|
|
if (key == "metadata") {
|
|
std::string lit;
|
|
if (!skip_value(p, end, &lit)) return false;
|
|
e.metadata_json = std::move(lit);
|
|
} else if (p < end && *p == '"') {
|
|
std::string val;
|
|
if (!parse_string(p, end, &val)) return false;
|
|
if (key == "id") e.tmp_id = std::move(val);
|
|
else if (key == "type_ref") e.type_ref = std::move(val);
|
|
else if (key == "name") e.name = std::move(val);
|
|
else if (key == "source") e.source = std::move(val);
|
|
} else {
|
|
std::string lit;
|
|
if (!skip_value(p, end, &lit)) return false;
|
|
if (key == "start") e.start_offset = std::atoi(lit.c_str());
|
|
else if (key == "end") e.end_offset = std::atoi(lit.c_str());
|
|
else if (key == "confidence") e.confidence = std::atof(lit.c_str());
|
|
}
|
|
p = skip_ws(p, end);
|
|
if (p < end && *p == ',') { ++p; continue; }
|
|
}
|
|
std::snprintf(e.type_buf, sizeof(e.type_buf), "%s", e.type_ref.c_str());
|
|
std::snprintf(e.name_buf, sizeof(e.name_buf), "%s", e.name.c_str());
|
|
e.selected = true;
|
|
out->push_back(std::move(e));
|
|
p = skip_ws(p, end);
|
|
if (p < end && *p == ',') { ++p; continue; }
|
|
if (p < end && *p == ']') { ++p; return true; }
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool parse_relations_array(const char*& p, const char* end,
|
|
std::vector<ProposedRelation>* out)
|
|
{
|
|
if (!expect_char(p, end, '[')) return false;
|
|
p = skip_ws(p, end);
|
|
if (p < end && *p == ']') { ++p; return true; }
|
|
while (p < end) {
|
|
if (!expect_char(p, end, '{')) return false;
|
|
ProposedRelation r;
|
|
while (p < end) {
|
|
p = skip_ws(p, end);
|
|
if (p < end && *p == '}') { ++p; break; }
|
|
std::string key;
|
|
if (!parse_string(p, end, &key)) return false;
|
|
if (!expect_char(p, end, ':')) return false;
|
|
p = skip_ws(p, end);
|
|
if (p < end && *p == '"') {
|
|
std::string val;
|
|
if (!parse_string(p, end, &val)) return false;
|
|
if (key == "from_id") r.from_tmp_id = std::move(val);
|
|
else if (key == "to_id") r.to_tmp_id = std::move(val);
|
|
else if (key == "name") r.name = std::move(val);
|
|
else if (key == "source") r.source = std::move(val);
|
|
} else {
|
|
std::string lit;
|
|
if (!skip_value(p, end, &lit)) return false;
|
|
if (key == "confidence") r.confidence = std::atof(lit.c_str());
|
|
}
|
|
p = skip_ws(p, end);
|
|
if (p < end && *p == ',') { ++p; continue; }
|
|
}
|
|
r.selected = true;
|
|
out->push_back(std::move(r));
|
|
p = skip_ws(p, end);
|
|
if (p < end && *p == ',') { ++p; continue; }
|
|
if (p < end && *p == ']') { ++p; return true; }
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool parse_layers_array(const char*& p, const char* end,
|
|
std::vector<std::string>* out)
|
|
{
|
|
if (!expect_char(p, end, '[')) return false;
|
|
p = skip_ws(p, end);
|
|
if (p < end && *p == ']') { ++p; return true; }
|
|
while (p < end) {
|
|
std::string s;
|
|
if (!parse_string(p, end, &s)) return false;
|
|
out->push_back(std::move(s));
|
|
p = skip_ws(p, end);
|
|
if (p < end && *p == ',') { ++p; continue; }
|
|
if (p < end && *p == ']') { ++p; return true; }
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool parse_object_paste_extract(const char*& p, const char* end,
|
|
ExtractResult* res)
|
|
{
|
|
if (!expect_char(p, end, '{')) return false;
|
|
while (p < end) {
|
|
p = skip_ws(p, end);
|
|
if (p < end && *p == '}') { ++p; return true; }
|
|
std::string key;
|
|
if (!parse_string(p, end, &key)) return false;
|
|
if (!expect_char(p, end, ':')) return false;
|
|
p = skip_ws(p, end);
|
|
if (key == "entities") {
|
|
if (!parse_entities_array(p, end, &res->entities)) return false;
|
|
} else if (key == "relations") {
|
|
if (!parse_relations_array(p, end, &res->relations)) return false;
|
|
} else if (key == "stats") {
|
|
if (!expect_char(p, end, '{')) return false;
|
|
while (p < end) {
|
|
p = skip_ws(p, end);
|
|
if (p < end && *p == '}') { ++p; break; }
|
|
std::string sk;
|
|
if (!parse_string(p, end, &sk)) return false;
|
|
if (!expect_char(p, end, ':')) return false;
|
|
p = skip_ws(p, end);
|
|
if (sk == "layers") {
|
|
if (!parse_layers_array(p, end, &res->layers)) return false;
|
|
} else {
|
|
if (!skip_value(p, end, nullptr)) return false;
|
|
}
|
|
p = skip_ws(p, end);
|
|
if (p < end && *p == ',') { ++p; continue; }
|
|
}
|
|
} else if (key == "error") {
|
|
std::string s;
|
|
if (p < end && *p == '"') {
|
|
if (!parse_string(p, end, &s)) return false;
|
|
res->error = std::move(s);
|
|
} else {
|
|
if (!skip_value(p, end, nullptr)) return false;
|
|
}
|
|
} else {
|
|
if (!skip_value(p, end, nullptr)) return false;
|
|
}
|
|
p = skip_ws(p, end);
|
|
if (p < end && *p == ',') { ++p; continue; }
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Subprocess
|
|
// ---------------------------------------------------------------------------
|
|
|
|
std::string python_path() {
|
|
if (const char* env = std::getenv("FN_PYTHON"); env && *env && file_exists(env)) {
|
|
return env;
|
|
}
|
|
if (!g_mod.registry_root.empty()) {
|
|
std::string p = g_mod.registry_root + "/python/.venv/bin/python3";
|
|
#ifdef _WIN32
|
|
return p;
|
|
#else
|
|
if (file_exists(p)) return p;
|
|
#endif
|
|
}
|
|
#ifdef _WIN32
|
|
return "python.exe";
|
|
#else
|
|
return "python3";
|
|
#endif
|
|
}
|
|
|
|
std::string build_stdin_payload(const std::string& text, bool use_hybrid) {
|
|
auto esc = [](const std::string& s) {
|
|
std::string o; o.reserve(s.size() + 8);
|
|
for (char c : s) {
|
|
switch (c) {
|
|
case '"': o += "\\\""; break;
|
|
case '\\': o += "\\\\"; break;
|
|
case '\n': o += "\\n"; break;
|
|
case '\r': o += "\\r"; break;
|
|
case '\t': o += "\\t"; break;
|
|
default:
|
|
if ((unsigned char)c < 0x20) {
|
|
char b[8];
|
|
std::snprintf(b, sizeof(b), "\\u%04x", (unsigned char)c);
|
|
o += b;
|
|
} else { o += c; }
|
|
}
|
|
}
|
|
return o;
|
|
};
|
|
std::ostringstream o;
|
|
o << "{"
|
|
<< "\"node_id\":\"\","
|
|
<< "\"node_name\":\"\","
|
|
<< "\"node_type\":\"\","
|
|
<< "\"metadata\":{},"
|
|
<< "\"ops_db_path\":\"\","
|
|
<< "\"app_dir\":\"" << esc(g_mod.app_dir) << "\","
|
|
<< "\"cache_dir\":\"\","
|
|
<< "\"registry_root\":\"" << esc(g_mod.registry_root) << "\","
|
|
<< "\"params\":{"
|
|
<< "\"text\":\"" << esc(text) << "\","
|
|
<< "\"use_hybrid\":" << (use_hybrid ? "true" : "false")
|
|
<< "}"
|
|
<< "}";
|
|
return o.str();
|
|
}
|
|
|
|
#ifdef _WIN32
|
|
|
|
bool spawn_python_blocking(const std::string& script_path,
|
|
const std::string& stdin_payload,
|
|
std::string* stdout_buf,
|
|
std::string* stderr_tail,
|
|
int* exit_code)
|
|
{
|
|
SECURITY_ATTRIBUTES sa{}; sa.nLength = sizeof(sa); sa.bInheritHandle = TRUE;
|
|
HANDLE in_r=nullptr, in_w=nullptr, out_r=nullptr, out_w=nullptr,
|
|
err_r=nullptr, err_w=nullptr;
|
|
if (!CreatePipe(&in_r,&in_w,&sa,0) ||
|
|
!CreatePipe(&out_r,&out_w,&sa,0) ||
|
|
!CreatePipe(&err_r,&err_w,&sa,0)) return false;
|
|
SetHandleInformation(in_w, HANDLE_FLAG_INHERIT, 0);
|
|
SetHandleInformation(out_r, HANDLE_FLAG_INHERIT, 0);
|
|
SetHandleInformation(err_r, HANDLE_FLAG_INHERIT, 0);
|
|
|
|
std::string py = python_path();
|
|
std::string cmd = "\"" + py + "\" \"" + script_path + "\"";
|
|
int n = MultiByteToWideChar(CP_UTF8, 0, cmd.c_str(), (int)cmd.size(),
|
|
nullptr, 0);
|
|
std::wstring w(n, 0);
|
|
MultiByteToWideChar(CP_UTF8, 0, cmd.c_str(), (int)cmd.size(), w.data(), n);
|
|
std::vector<wchar_t> wbuf(w.begin(), w.end()); wbuf.push_back(0);
|
|
|
|
STARTUPINFOW si{}; si.cb = sizeof(si); si.dwFlags = STARTF_USESTDHANDLES;
|
|
si.hStdInput=in_r; si.hStdOutput=out_w; si.hStdError=err_w;
|
|
PROCESS_INFORMATION pi{};
|
|
BOOL ok = CreateProcessW(nullptr, wbuf.data(), nullptr, nullptr, TRUE,
|
|
CREATE_NO_WINDOW, nullptr, nullptr, &si, &pi);
|
|
CloseHandle(in_r); CloseHandle(out_w); CloseHandle(err_w);
|
|
if (!ok) {
|
|
if (stderr_tail) *stderr_tail = "CreateProcessW failed";
|
|
CloseHandle(in_w); CloseHandle(out_r); CloseHandle(err_r);
|
|
return false;
|
|
}
|
|
DWORD wn = 0;
|
|
WriteFile(in_w, stdin_payload.data(), (DWORD)stdin_payload.size(),
|
|
&wn, nullptr);
|
|
CloseHandle(in_w);
|
|
|
|
std::thread err_t([&]() {
|
|
char b[1024];
|
|
while (true) {
|
|
DWORD m = 0;
|
|
if (!ReadFile(err_r, b, sizeof(b), &m, nullptr) || m == 0) break;
|
|
if (stderr_tail) {
|
|
stderr_tail->append(b, (size_t)m);
|
|
if (stderr_tail->size() > 4096)
|
|
stderr_tail->erase(0, stderr_tail->size() - 4096);
|
|
}
|
|
}
|
|
});
|
|
{
|
|
char b[4096];
|
|
while (true) {
|
|
DWORD m = 0;
|
|
if (!ReadFile(out_r, b, sizeof(b), &m, nullptr) || m == 0) break;
|
|
if (stdout_buf) stdout_buf->append(b, (size_t)m);
|
|
if (stdout_buf && stdout_buf->size() > 1024 * 1024) break;
|
|
}
|
|
}
|
|
CloseHandle(out_r);
|
|
WaitForSingleObject(pi.hProcess, INFINITE);
|
|
DWORD ec = 0; GetExitCodeProcess(pi.hProcess, &ec);
|
|
if (exit_code) *exit_code = (int)ec;
|
|
err_t.join();
|
|
CloseHandle(err_r); CloseHandle(pi.hProcess); CloseHandle(pi.hThread);
|
|
return true;
|
|
}
|
|
|
|
#else
|
|
|
|
bool spawn_python_blocking(const std::string& script_path,
|
|
const std::string& stdin_payload,
|
|
std::string* stdout_buf,
|
|
std::string* stderr_tail,
|
|
int* exit_code)
|
|
{
|
|
int p_in[2]={-1,-1}, p_out[2]={-1,-1}, p_err[2]={-1,-1};
|
|
if (pipe(p_in) != 0 || pipe(p_out) != 0 || pipe(p_err) != 0) return false;
|
|
pid_t pid = fork();
|
|
if (pid < 0) return false;
|
|
if (pid == 0) {
|
|
dup2(p_in[0], 0); dup2(p_out[1], 1); dup2(p_err[1], 2);
|
|
close(p_in[0]); close(p_in[1]);
|
|
close(p_out[0]); close(p_out[1]);
|
|
close(p_err[0]); close(p_err[1]);
|
|
std::string py = python_path();
|
|
const char* argv[] = { py.c_str(), script_path.c_str(), nullptr };
|
|
execv(py.c_str(), (char* const*)argv);
|
|
execvp(py.c_str(), (char* const*)argv);
|
|
_exit(127);
|
|
}
|
|
close(p_in[0]); close(p_out[1]); close(p_err[1]);
|
|
if (!stdin_payload.empty()) {
|
|
size_t left = stdin_payload.size();
|
|
const char* p = stdin_payload.data();
|
|
while (left > 0) {
|
|
ssize_t n = write(p_in[1], p, left);
|
|
if (n < 0) { if (errno == EINTR) continue; break; }
|
|
p += n; left -= (size_t)n;
|
|
}
|
|
}
|
|
close(p_in[1]);
|
|
std::thread err_t([&]() {
|
|
char b[1024];
|
|
while (true) {
|
|
ssize_t n = read(p_err[0], b, sizeof(b));
|
|
if (n <= 0) break;
|
|
if (stderr_tail) {
|
|
stderr_tail->append(b, (size_t)n);
|
|
if (stderr_tail->size() > 4096)
|
|
stderr_tail->erase(0, stderr_tail->size() - 4096);
|
|
}
|
|
}
|
|
});
|
|
{
|
|
char b[4096];
|
|
while (true) {
|
|
ssize_t n = read(p_out[0], b, sizeof(b));
|
|
if (n <= 0) break;
|
|
if (stdout_buf) stdout_buf->append(b, (size_t)n);
|
|
if (stdout_buf && stdout_buf->size() > 1024 * 1024) break;
|
|
}
|
|
}
|
|
close(p_out[0]);
|
|
int status = 0;
|
|
waitpid(pid, &status, 0);
|
|
err_t.join();
|
|
close(p_err[0]);
|
|
if (exit_code) {
|
|
*exit_code = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
#endif
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Apply (lado SQLite)
|
|
// ---------------------------------------------------------------------------
|
|
|
|
bool find_existing_entity(sqlite3* db, const std::string& type_ref,
|
|
const std::string& name, std::string* out_id)
|
|
{
|
|
sqlite3_stmt* st = nullptr;
|
|
const char* sql =
|
|
"SELECT id FROM entities WHERE type_ref = ? AND name = ? LIMIT 1";
|
|
if (sqlite3_prepare_v2(db, sql, -1, &st, nullptr) != SQLITE_OK) return false;
|
|
sqlite3_bind_text(st, 1, type_ref.c_str(), -1, SQLITE_TRANSIENT);
|
|
sqlite3_bind_text(st, 2, name.c_str(), -1, SQLITE_TRANSIENT);
|
|
bool found = false;
|
|
if (sqlite3_step(st) == SQLITE_ROW) {
|
|
const unsigned char* t = sqlite3_column_text(st, 0);
|
|
if (t) { *out_id = (const char*)t; found = true; }
|
|
}
|
|
sqlite3_finalize(st);
|
|
return found;
|
|
}
|
|
|
|
bool insert_entity_with_metadata(sqlite3* db,
|
|
const std::string& id,
|
|
const std::string& name,
|
|
const std::string& type_ref,
|
|
const std::string& metadata_json,
|
|
const std::string& ts)
|
|
{
|
|
sqlite3_stmt* st = nullptr;
|
|
const char* sql =
|
|
"INSERT INTO entities (id, name, type_ref, source, metadata, "
|
|
" created_at, updated_at) "
|
|
"VALUES (?, ?, ?, 'panel:paste_extract', ?, ?, ?)";
|
|
if (sqlite3_prepare_v2(db, sql, -1, &st, nullptr) != SQLITE_OK) return false;
|
|
sqlite3_bind_text(st, 1, id.c_str(), -1, SQLITE_TRANSIENT);
|
|
sqlite3_bind_text(st, 2, name.c_str(), -1, SQLITE_TRANSIENT);
|
|
sqlite3_bind_text(st, 3, type_ref.c_str(), -1, SQLITE_TRANSIENT);
|
|
const std::string md = metadata_json.empty() ? std::string("{}")
|
|
: metadata_json;
|
|
sqlite3_bind_text(st, 4, md.c_str(), -1, SQLITE_TRANSIENT);
|
|
sqlite3_bind_text(st, 5, ts.c_str(), -1, SQLITE_TRANSIENT);
|
|
sqlite3_bind_text(st, 6, ts.c_str(), -1, SQLITE_TRANSIENT);
|
|
bool ok = sqlite3_step(st) == SQLITE_DONE;
|
|
sqlite3_finalize(st);
|
|
return ok;
|
|
}
|
|
|
|
bool relation_exists(sqlite3* db, const std::string& from_id,
|
|
const std::string& to_id, const std::string& name)
|
|
{
|
|
sqlite3_stmt* st = nullptr;
|
|
const char* sql =
|
|
"SELECT 1 FROM relations WHERE from_entity = ? AND to_entity = ? "
|
|
"AND name = ? LIMIT 1";
|
|
if (sqlite3_prepare_v2(db, sql, -1, &st, nullptr) != SQLITE_OK) return false;
|
|
sqlite3_bind_text(st, 1, from_id.c_str(), -1, SQLITE_TRANSIENT);
|
|
sqlite3_bind_text(st, 2, to_id.c_str(), -1, SQLITE_TRANSIENT);
|
|
sqlite3_bind_text(st, 3, name.c_str(), -1, SQLITE_TRANSIENT);
|
|
bool found = (sqlite3_step(st) == SQLITE_ROW);
|
|
sqlite3_finalize(st);
|
|
return found;
|
|
}
|
|
|
|
bool insert_relation_simple(sqlite3* db, const std::string& id,
|
|
const std::string& name,
|
|
const std::string& from_id,
|
|
const std::string& to_id,
|
|
const std::string& ts)
|
|
{
|
|
sqlite3_stmt* st = nullptr;
|
|
const char* sql =
|
|
"INSERT INTO relations (id, name, from_entity, to_entity, "
|
|
" created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?)";
|
|
if (sqlite3_prepare_v2(db, sql, -1, &st, nullptr) != SQLITE_OK) return false;
|
|
sqlite3_bind_text(st, 1, id.c_str(), -1, SQLITE_TRANSIENT);
|
|
sqlite3_bind_text(st, 2, name.c_str(), -1, SQLITE_TRANSIENT);
|
|
sqlite3_bind_text(st, 3, from_id.c_str(), -1, SQLITE_TRANSIENT);
|
|
sqlite3_bind_text(st, 4, to_id.c_str(), -1, SQLITE_TRANSIENT);
|
|
sqlite3_bind_text(st, 5, ts.c_str(), -1, SQLITE_TRANSIENT);
|
|
sqlite3_bind_text(st, 6, ts.c_str(), -1, SQLITE_TRANSIENT);
|
|
bool ok = sqlite3_step(st) == SQLITE_DONE;
|
|
sqlite3_finalize(st);
|
|
return ok;
|
|
}
|
|
|
|
} // namespace
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// API publica
|
|
// ---------------------------------------------------------------------------
|
|
|
|
void extract_panel_init(const char* enrichers_dir,
|
|
const char* app_dir,
|
|
const char* registry_root)
|
|
{
|
|
g_mod.enrichers_dir = enrichers_dir ? enrichers_dir : "";
|
|
g_mod.app_dir = app_dir ? app_dir : "";
|
|
g_mod.registry_root = registry_root ? registry_root : "";
|
|
}
|
|
|
|
void extract_panel_shutdown() {
|
|
if (!g_panel) return;
|
|
if (g_panel->worker.joinable()) g_panel->worker.join();
|
|
delete g_panel;
|
|
g_panel = nullptr;
|
|
}
|
|
|
|
bool extract_panel_parse_result(const std::string& json_text,
|
|
ExtractResult* res)
|
|
{
|
|
if (!res) return false;
|
|
res->entities.clear();
|
|
res->relations.clear();
|
|
res->layers.clear();
|
|
res->error.clear();
|
|
if (json_text.empty()) {
|
|
res->error = "empty result";
|
|
return false;
|
|
}
|
|
size_t end = json_text.size();
|
|
while (end > 0 && (json_text[end-1] == '\n' || json_text[end-1] == '\r' ||
|
|
json_text[end-1] == ' ' || json_text[end-1] == '\t'))
|
|
--end;
|
|
size_t start = end > 0 ? json_text.rfind('\n', end - 1) : std::string::npos;
|
|
start = (start == std::string::npos) ? 0 : start + 1;
|
|
const char* p = json_text.data() + start;
|
|
const char* e = json_text.data() + end;
|
|
p = skip_ws(p, e);
|
|
if (!parse_object_paste_extract(p, e, res)) {
|
|
res->error = "json parse error";
|
|
return false;
|
|
}
|
|
return res->error.empty();
|
|
}
|
|
|
|
bool extract_panel_run_subprocess(const std::string& text,
|
|
bool use_hybrid,
|
|
ExtractResult* out)
|
|
{
|
|
if (!out) return false;
|
|
out->entities.clear();
|
|
out->relations.clear();
|
|
out->layers.clear();
|
|
out->error.clear();
|
|
out->stderr_tail.clear();
|
|
|
|
std::string script;
|
|
if (!g_mod.enrichers_dir.empty()) {
|
|
script = g_mod.enrichers_dir + "/paste_extract/run.py";
|
|
}
|
|
if (script.empty() || !file_exists(script)) {
|
|
out->error = "paste_extract/run.py not found";
|
|
return false;
|
|
}
|
|
std::string stdin_payload = build_stdin_payload(text, use_hybrid);
|
|
std::string stdout_buf;
|
|
std::string stderr_tail;
|
|
int rc = -1;
|
|
if (!spawn_python_blocking(script, stdin_payload, &stdout_buf,
|
|
&stderr_tail, &rc)) {
|
|
out->error = "spawn failed";
|
|
out->stderr_tail = std::move(stderr_tail);
|
|
return false;
|
|
}
|
|
out->stderr_tail = std::move(stderr_tail);
|
|
if (rc != 0) {
|
|
char b[64];
|
|
std::snprintf(b, sizeof(b), "exit %d", rc);
|
|
out->error = b;
|
|
return false;
|
|
}
|
|
if (!extract_panel_parse_result(stdout_buf, out)) {
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool extract_panel_apply(const char* ops_db_path,
|
|
const ExtractResult& result,
|
|
int* out_added_entities,
|
|
int* out_dedup_entities,
|
|
int* out_added_relations,
|
|
int* out_skipped_relations)
|
|
{
|
|
if (out_added_entities) *out_added_entities = 0;
|
|
if (out_dedup_entities) *out_dedup_entities = 0;
|
|
if (out_added_relations) *out_added_relations = 0;
|
|
if (out_skipped_relations) *out_skipped_relations = 0;
|
|
if (!ops_db_path || !*ops_db_path) return false;
|
|
|
|
sqlite3* db = nullptr;
|
|
if (sqlite3_open_v2(ops_db_path, &db, SQLITE_OPEN_READWRITE, nullptr)
|
|
!= SQLITE_OK) {
|
|
if (db) sqlite3_close(db);
|
|
return false;
|
|
}
|
|
sqlite3_exec(db, "BEGIN", nullptr, nullptr, nullptr);
|
|
|
|
std::string ts = now_iso_local();
|
|
std::unordered_map<std::string, std::string> map_id;
|
|
map_id.reserve(result.entities.size());
|
|
|
|
int idx = 0;
|
|
int added_e = 0, dedup_e = 0;
|
|
for (const auto& e : result.entities) {
|
|
if (!e.selected) { ++idx; continue; }
|
|
std::string type_ref = e.type_buf[0] ? std::string(e.type_buf) : e.type_ref;
|
|
std::string name = e.name_buf[0] ? std::string(e.name_buf) : e.name;
|
|
if (type_ref.empty() || name.empty()) { ++idx; continue; }
|
|
std::string sql_id;
|
|
if (find_existing_entity(db, type_ref, name, &sql_id)) {
|
|
map_id[e.tmp_id] = sql_id;
|
|
++dedup_e;
|
|
} else {
|
|
char id_buf[96];
|
|
std::snprintf(id_buf, sizeof(id_buf), "%s_%lld_%d",
|
|
type_ref.c_str(), now_ms_local(), idx);
|
|
std::string new_id = id_buf;
|
|
if (!insert_entity_with_metadata(db, new_id, name, type_ref,
|
|
e.metadata_json, ts)) {
|
|
sqlite3_exec(db, "ROLLBACK", nullptr, nullptr, nullptr);
|
|
sqlite3_close(db);
|
|
return false;
|
|
}
|
|
map_id[e.tmp_id] = new_id;
|
|
++added_e;
|
|
}
|
|
++idx;
|
|
}
|
|
|
|
int added_r = 0, skipped_r = 0;
|
|
int ridx = 0;
|
|
for (const auto& r : result.relations) {
|
|
++ridx;
|
|
if (!r.selected) continue;
|
|
auto it_from = map_id.find(r.from_tmp_id);
|
|
auto it_to = map_id.find(r.to_tmp_id);
|
|
if (it_from == map_id.end() || it_to == map_id.end()) {
|
|
++skipped_r; continue;
|
|
}
|
|
std::string name = r.name.empty() ? std::string("RELATED_TO") : r.name;
|
|
if (relation_exists(db, it_from->second, it_to->second, name)) {
|
|
++skipped_r; continue;
|
|
}
|
|
char id_buf[96];
|
|
std::snprintf(id_buf, sizeof(id_buf), "rel_%lld_%d",
|
|
now_ms_local(), ridx);
|
|
if (!insert_relation_simple(db, id_buf, name,
|
|
it_from->second, it_to->second, ts)) {
|
|
sqlite3_exec(db, "ROLLBACK", nullptr, nullptr, nullptr);
|
|
sqlite3_close(db);
|
|
return false;
|
|
}
|
|
++added_r;
|
|
}
|
|
|
|
sqlite3_exec(db, "COMMIT", nullptr, nullptr, nullptr);
|
|
sqlite3_close(db);
|
|
|
|
if (out_added_entities) *out_added_entities = added_e;
|
|
if (out_dedup_entities) *out_dedup_entities = dedup_e;
|
|
if (out_added_relations) *out_added_relations = added_r;
|
|
if (out_skipped_relations) *out_skipped_relations = skipped_r;
|
|
return true;
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Render (ImGui)
|
|
// ---------------------------------------------------------------------------
|
|
|
|
namespace {
|
|
|
|
void launch_extract_async(ExtractPanelState& s, bool use_hybrid) {
|
|
if (s.busy.load()) return;
|
|
if (s.worker.joinable()) s.worker.join(); // por si hubo uno previo
|
|
|
|
// Snapshot del texto (el buffer puede mutar mientras corre).
|
|
std::string text(s.text_buf.data());
|
|
s.busy.store(true);
|
|
s.status = "Extracting...";
|
|
s.new_result.store(false);
|
|
|
|
s.worker = std::thread([&s, text, use_hybrid]() {
|
|
auto r = std::make_shared<ExtractResult>();
|
|
bool ok = extract_panel_run_subprocess(text, use_hybrid, r.get());
|
|
{
|
|
std::lock_guard<std::mutex> lk(s.result_mu);
|
|
s.result = r;
|
|
char buf[128];
|
|
if (ok) {
|
|
std::snprintf(buf, sizeof(buf),
|
|
"OK — %zu entities, %zu relations",
|
|
r->entities.size(), r->relations.size());
|
|
} else {
|
|
std::snprintf(buf, sizeof(buf), "ERROR: %s",
|
|
r->error.c_str());
|
|
}
|
|
s.status = buf;
|
|
}
|
|
s.new_result.store(true);
|
|
s.busy.store(false);
|
|
});
|
|
}
|
|
|
|
} // namespace
|
|
|
|
void extract_panel_render(AppState& app) {
|
|
if (!app.panel_extract) return;
|
|
|
|
ExtractPanelState& s = panel_state();
|
|
|
|
if (!ImGui::Begin("Extract", &app.panel_extract)) {
|
|
ImGui::End();
|
|
return;
|
|
}
|
|
|
|
// Top bar: hybrid toggle + Extract button + status.
|
|
ImGui::Checkbox("Use hybrid (GLiNER/GLiREL)", &s.use_hybrid);
|
|
ImGui::SameLine();
|
|
bool busy = s.busy.load();
|
|
ImGui::BeginDisabled(busy || s.text_buf.size() < 2 || s.text_buf[0] == 0);
|
|
if (ImGui::Button(busy ? "Extracting..." : (TI_BOLT " Extract"))) {
|
|
launch_extract_async(s, s.use_hybrid);
|
|
}
|
|
ImGui::EndDisabled();
|
|
ImGui::SameLine();
|
|
ImGui::TextDisabled("%s", s.status.c_str());
|
|
|
|
// Multi-line text input. Crece dinamicamente si el usuario pega un
|
|
// texto largo (ImGuiInputTextFlags_CallbackResize).
|
|
auto resize_cb = [](ImGuiInputTextCallbackData* data) -> int {
|
|
if (data->EventFlag == ImGuiInputTextFlags_CallbackResize) {
|
|
auto* buf = (std::vector<char>*)data->UserData;
|
|
buf->resize(data->BufTextLen + 1);
|
|
data->Buf = buf->data();
|
|
}
|
|
return 0;
|
|
};
|
|
|
|
ImVec2 input_size(-1.0f, ImGui::GetContentRegionAvail().y * 0.45f);
|
|
ImGui::InputTextMultiline("##paste_text",
|
|
s.text_buf.data(), s.text_buf.size(),
|
|
input_size,
|
|
ImGuiInputTextFlags_CallbackResize, resize_cb, &s.text_buf);
|
|
|
|
ImGui::Separator();
|
|
|
|
// Tablas de propuestas.
|
|
std::shared_ptr<ExtractResult> res;
|
|
{
|
|
std::lock_guard<std::mutex> lk(s.result_mu);
|
|
res = s.result;
|
|
}
|
|
if (!res) {
|
|
ImGui::TextDisabled("Pega texto y pulsa Extract para ver propuestas.");
|
|
ImGui::End();
|
|
return;
|
|
}
|
|
if (!res->error.empty()) {
|
|
ImGui::TextColored(ImVec4(0.95f,0.45f,0.45f,1.0f),
|
|
"Error: %s", res->error.c_str());
|
|
if (!res->stderr_tail.empty()) {
|
|
ImGui::TextWrapped("%s", res->stderr_tail.c_str());
|
|
}
|
|
ImGui::End();
|
|
return;
|
|
}
|
|
|
|
if (!res->layers.empty()) {
|
|
std::string layers = "Layers: ";
|
|
for (size_t i = 0; i < res->layers.size(); ++i) {
|
|
if (i) layers += ", ";
|
|
layers += res->layers[i];
|
|
}
|
|
ImGui::TextDisabled("%s", layers.c_str());
|
|
}
|
|
|
|
// Toolbar para select-all / none.
|
|
if (ImGui::SmallButton("All")) {
|
|
for (auto& e : res->entities) e.selected = true;
|
|
for (auto& r : res->relations) r.selected = true;
|
|
}
|
|
ImGui::SameLine();
|
|
if (ImGui::SmallButton("None")) {
|
|
for (auto& e : res->entities) e.selected = false;
|
|
for (auto& r : res->relations) r.selected = false;
|
|
}
|
|
ImGui::SameLine();
|
|
int sel_e = 0, sel_r = 0;
|
|
for (const auto& e : res->entities) if (e.selected) ++sel_e;
|
|
for (const auto& r : res->relations) if (r.selected) ++sel_r;
|
|
ImGui::TextDisabled("Selected: %d entities, %d relations", sel_e, sel_r);
|
|
|
|
// Apply Selected.
|
|
ImGui::SameLine();
|
|
bool can_apply = (sel_e + sel_r) > 0 && !app.input_db_path.empty();
|
|
ImGui::BeginDisabled(!can_apply);
|
|
if (ImGui::Button(TI_CHECK " Apply Selected")) {
|
|
int ae=0, de=0, ar=0, sr=0;
|
|
bool ok = extract_panel_apply(app.input_db_path.c_str(),
|
|
*res, &ae, &de, &ar, &sr);
|
|
if (ok) {
|
|
s.last_apply_entities = ae;
|
|
s.last_apply_relations = ar;
|
|
s.last_apply_dedup = de;
|
|
char buf[160];
|
|
std::snprintf(buf, sizeof(buf),
|
|
"Applied: +%d entities, +%d relations (%d deduped, %d skipped)",
|
|
ae, ar, de, sr);
|
|
s.status = buf;
|
|
app.want_reload = true;
|
|
} else {
|
|
s.status = "Apply failed";
|
|
}
|
|
}
|
|
ImGui::EndDisabled();
|
|
if (app.input_db_path.empty()) {
|
|
ImGui::SameLine();
|
|
ImGui::TextColored(ImVec4(1.0f,0.7f,0.3f,1.0f),
|
|
"(no operations.db loaded)");
|
|
}
|
|
|
|
// Tabla de entidades.
|
|
if (!res->entities.empty() &&
|
|
ImGui::CollapsingHeader("Entities", ImGuiTreeNodeFlags_DefaultOpen)) {
|
|
if (ImGui::BeginTable("##ents", 5,
|
|
ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg |
|
|
ImGuiTableFlags_ScrollY,
|
|
ImVec2(0.0f, 200.0f))) {
|
|
ImGui::TableSetupColumn("", ImGuiTableColumnFlags_WidthFixed, 28.0f);
|
|
ImGui::TableSetupColumn("Type", ImGuiTableColumnFlags_WidthFixed, 120.0f);
|
|
ImGui::TableSetupColumn("Name");
|
|
ImGui::TableSetupColumn("Span", ImGuiTableColumnFlags_WidthFixed, 90.0f);
|
|
ImGui::TableSetupColumn("Src", ImGuiTableColumnFlags_WidthFixed, 60.0f);
|
|
ImGui::TableHeadersRow();
|
|
|
|
for (size_t i = 0; i < res->entities.size(); ++i) {
|
|
auto& e = res->entities[i];
|
|
ImGui::TableNextRow();
|
|
ImGui::PushID((int)i);
|
|
|
|
ImGui::TableNextColumn();
|
|
ImGui::Checkbox("##sel", &e.selected);
|
|
|
|
ImGui::TableNextColumn();
|
|
ImGui::SetNextItemWidth(-1);
|
|
ImGui::InputText("##type", e.type_buf, sizeof(e.type_buf));
|
|
|
|
ImGui::TableNextColumn();
|
|
ImGui::SetNextItemWidth(-1);
|
|
ImGui::InputText("##name", e.name_buf, sizeof(e.name_buf));
|
|
|
|
ImGui::TableNextColumn();
|
|
if (e.start_offset >= 0) {
|
|
ImGui::Text("%d-%d", e.start_offset, e.end_offset);
|
|
} else {
|
|
ImGui::TextDisabled("—");
|
|
}
|
|
|
|
ImGui::TableNextColumn();
|
|
ImGui::TextDisabled("%s", e.source.c_str());
|
|
|
|
ImGui::PopID();
|
|
}
|
|
ImGui::EndTable();
|
|
}
|
|
}
|
|
|
|
// Tabla de relaciones.
|
|
if (!res->relations.empty() &&
|
|
ImGui::CollapsingHeader("Relations", ImGuiTreeNodeFlags_DefaultOpen)) {
|
|
if (ImGui::BeginTable("##rels", 5,
|
|
ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg |
|
|
ImGuiTableFlags_ScrollY,
|
|
ImVec2(0.0f, 160.0f))) {
|
|
ImGui::TableSetupColumn("", ImGuiTableColumnFlags_WidthFixed, 28.0f);
|
|
ImGui::TableSetupColumn("From", ImGuiTableColumnFlags_WidthFixed, 100.0f);
|
|
ImGui::TableSetupColumn("Name");
|
|
ImGui::TableSetupColumn("To", ImGuiTableColumnFlags_WidthFixed, 100.0f);
|
|
ImGui::TableSetupColumn("Conf", ImGuiTableColumnFlags_WidthFixed, 60.0f);
|
|
ImGui::TableHeadersRow();
|
|
|
|
// Lookup helper: tmp_id -> entity name (para mostrar en From/To).
|
|
auto entity_label = [&](const std::string& tmp) -> std::string {
|
|
for (const auto& e : res->entities) {
|
|
if (e.tmp_id == tmp) {
|
|
std::string n = e.name_buf[0] ? e.name_buf : e.name;
|
|
if (n.size() > 18) n = n.substr(0, 15) + "...";
|
|
return n;
|
|
}
|
|
}
|
|
return tmp;
|
|
};
|
|
|
|
for (size_t i = 0; i < res->relations.size(); ++i) {
|
|
auto& r = res->relations[i];
|
|
ImGui::TableNextRow();
|
|
ImGui::PushID(2000 + (int)i);
|
|
|
|
ImGui::TableNextColumn();
|
|
ImGui::Checkbox("##sel", &r.selected);
|
|
|
|
ImGui::TableNextColumn();
|
|
ImGui::TextUnformatted(entity_label(r.from_tmp_id).c_str());
|
|
|
|
ImGui::TableNextColumn();
|
|
ImGui::TextUnformatted(r.name.c_str());
|
|
|
|
ImGui::TableNextColumn();
|
|
ImGui::TextUnformatted(entity_label(r.to_tmp_id).c_str());
|
|
|
|
ImGui::TableNextColumn();
|
|
ImGui::Text("%.2f", r.confidence);
|
|
|
|
ImGui::PopID();
|
|
}
|
|
ImGui::EndTable();
|
|
}
|
|
}
|
|
|
|
ImGui::End();
|
|
}
|
|
|
|
} // namespace ge
|