chore: auto-commit (11 archivos)

- CMakeLists.txt
- app.md
- main.cpp
- panels.cpp
- appicon.ico
- autoextract_panel.cpp
- picker_state.cpp
- picker_state.h
- py_subprocess.cpp
- py_subprocess.h
- ...

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-16 16:33:25 +02:00
parent fdd607b570
commit 8357774b86
11 changed files with 1626 additions and 20 deletions
+8
View File
@@ -17,6 +17,10 @@ add_imgui_app(navegator_dashboard
cdp_ws.cpp cdp_ws.cpp
network_state.cpp network_state.cpp
session_state.cpp session_state.cpp
picker_state.cpp
py_subprocess.cpp
autoextract_panel.cpp
recipes_panel.cpp
) )
target_include_directories(navegator_dashboard PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) target_include_directories(navegator_dashboard PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
@@ -50,6 +54,10 @@ if(FN_BUILD_TESTS)
cdp_ws.cpp cdp_ws.cpp
network_state.cpp network_state.cpp
session_state.cpp session_state.cpp
picker_state.cpp
py_subprocess.cpp
autoextract_panel.cpp
recipes_panel.cpp
tests/navegator_dashboard_tests.cpp tests/navegator_dashboard_tests.cpp
) )
target_include_directories(navegator_dashboard_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) target_include_directories(navegator_dashboard_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
+15 -1
View File
@@ -3,7 +3,7 @@ name: navegator_dashboard
lang: cpp lang: cpp
domain: tools domain: tools
description: "Cuadro de mandos para gestionar instancias Chrome con remote debugging. Lista navegadores corriendo (visibles + headless), permite lanzar/matar perfiles, inspeccionar pestañas, ejecutar JS, ver peticiones de red. Puente WSL→Windows que centraliza el control que hoy hacemos por scripts dispersos." description: "Cuadro de mandos para gestionar instancias Chrome con remote debugging. Lista navegadores corriendo (visibles + headless), permite lanzar/matar perfiles, inspeccionar pestañas, ejecutar JS, ver peticiones de red. Puente WSL→Windows que centraliza el control que hoy hacemos por scripts dispersos."
tags: [imgui, browser, cdp, dashboard, windows, navegator] tags: [imgui, browser, cdp, dashboard, windows, navegator, auto-extract, recipes, picker]
uses_functions: uses_functions:
- data_table_cpp_viz - data_table_cpp_viz
- viz_render_cpp_viz - viz_render_cpp_viz
@@ -17,6 +17,14 @@ uses_functions:
- compute_column_stats_cpp_core - compute_column_stats_cpp_core
- llm_anthropic_cpp_core - llm_anthropic_cpp_core
- tql_to_sql_cpp_core - tql_to_sql_cpp_core
- claude_cli_prompt_py_infra
- cdp_get_ax_tree_py_pipelines
- llm_propose_scraping_schema_py_infra
- cdp_extract_recipe_py_pipelines
- cdp_open_url_and_wait_py_pipelines
- validate_recipe_yaml_py_core
- infer_json_rows_schema_py_core
- cdp_pick_element_js_js_browser
uses_types: [] uses_types: []
framework: "imgui" framework: "imgui"
entry_point: "main.cpp" entry_point: "main.cpp"
@@ -37,6 +45,12 @@ e2e_checks:
cmd: "curl -sf http://127.0.0.1:19333/browsers" cmd: "curl -sf http://127.0.0.1:19333/browsers"
timeout_s: 5 timeout_s: 5
severity: warning severity: warning
- id: claude_cli_available
cmd: "command -v claude"
severity: warning
- id: python_pipelines_importable
cmd: "python3 -c 'import sys, os; sys.path.insert(0, os.path.join(os.environ[\"FN_REGISTRY_ROOT\"], \"python\", \"functions\", \"pipelines\")); from cdp_extract_recipe import cdp_extract_recipe'"
severity: warning
--- ---
## Proposito ## Proposito
BIN
View File
Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

+585
View File
@@ -0,0 +1,585 @@
// autoextract_panel — UI para AutoExtract:
// 1. URL -> Open & Analyze: pipeline Python
// cdp_open_url_and_wait + cdp_get_ax_tree + llm_propose_scraping_schema.
// 2. Editar schema propuesto (selectores + keep checkbox + tipo).
// 3. Test extraction via Runtime.evaluate (JS construido).
// 4. Save as recipe -> YAML en projects/navegator/profiles/default/recipes/.
// + INSERT en data_factory.db (subprocess sqlite3 inline).
//
// La comunicacion con Python es subprocess (py_subprocess.h) — el script Python
// reusa funciones del registry via sys.path injection desde FN_REGISTRY_ROOT.
#include "imgui.h"
#include "core/icons_tabler.h"
#include "core/tokens.h"
#include "session_state.h"
#include "py_subprocess.h"
#include "picker_state.h"
#include "crude_json.h"
#include <algorithm>
#include <atomic>
#include <cerrno>
#include <chrono>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <fstream>
#include <mutex>
#include <sstream>
#include <string>
#include <thread>
#include <vector>
#ifdef _WIN32
# define WIN32_LEAN_AND_MEAN
# include <windows.h>
#else
# include <sys/stat.h>
# include <sys/types.h>
#endif
namespace navegator {
namespace {
struct SchemaField {
std::string field;
std::string selector;
std::string sample;
std::string type; // string|number|bool|url
bool keep = true;
};
struct AutoExtractState {
std::mutex mu;
char url_input[1024] = "https://news.ycombinator.com";
char recipe_name[256] = "";
std::atomic<bool> busy{false};
std::string status;
std::string last_error;
std::string raw_python_output; // diagnostico
std::vector<SchemaField> schema;
std::string proposed_tab_id;
std::string test_output;
};
AutoExtractState g_ax;
// JSON-escape de selector para serializar el JS de extraccion.
std::string js_escape(const std::string& s) {
std::string out; out.reserve(s.size() + 4);
for (char c : s) {
switch (c) {
case '\\': out += "\\\\"; break;
case '"': out += "\\\""; break;
case '\n': out += "\\n"; break;
case '\r': out += "\\r"; break;
default: out += c;
}
}
return out;
}
std::string slugify(const std::string& s) {
std::string out; out.reserve(s.size());
for (char c : s) {
if (std::isalnum((unsigned char)c)) out += (char)std::tolower((unsigned char)c);
else if (c == ' ' || c == '-' || c == '_') out += '-';
}
if (out.empty()) out = "recipe";
return out;
}
void run_open_and_analyze(int port, std::string url) {
if (g_ax.busy.exchange(true)) return;
{
std::lock_guard<std::mutex> lk(g_ax.mu);
g_ax.status = "Spawning python pipeline...";
g_ax.last_error.clear();
g_ax.schema.clear();
}
std::thread([port, url]() {
const char* code = R"PY(
import sys, os, json, traceback
root = os.environ.get('FN_REGISTRY_ROOT', '')
if not root:
print(json.dumps({"error": "FN_REGISTRY_ROOT not set"}))
sys.exit(2)
for sub in ('pipelines','core','infra'):
sys.path.insert(0, os.path.join(root, 'python', 'functions', sub))
try:
from cdp_open_url_and_wait import cdp_open_url_and_wait
from cdp_get_ax_tree import cdp_get_ax_tree
from llm_propose_scraping_schema import llm_propose_scraping_schema
url = sys.argv[1]
port = int(sys.argv[2])
tab_id = cdp_open_url_and_wait(port, url, timeout_s=30)
ax = cdp_get_ax_tree(port, tab_id)
schema = llm_propose_scraping_schema(url, ax)
out = {"tab_id": tab_id}
if isinstance(schema, dict):
out.update(schema)
else:
out["fields"] = schema
print(json.dumps(out))
except Exception as e:
print(json.dumps({"error": str(e), "trace": traceback.format_exc()}))
sys.exit(1)
)PY";
std::vector<std::string> argv;
argv.push_back(py_resolve_interpreter());
argv.push_back("-c");
argv.push_back(code);
argv.push_back(url);
argv.push_back(std::to_string(port));
PyResult r = py_run(argv, 120000);
{
std::lock_guard<std::mutex> lk(g_ax.mu);
g_ax.raw_python_output = r.stdout_data;
}
if (r.exit_code != 0 || r.stdout_data.empty()) {
std::lock_guard<std::mutex> lk(g_ax.mu);
g_ax.last_error = r.error.empty() ? "python exited non-zero" : r.error;
g_ax.status = "Failed";
g_ax.busy.store(false);
return;
}
// Parse JSON (puede haber varias lineas — tomamos la ultima no vacia).
std::string json_line;
{
std::stringstream ss(r.stdout_data);
std::string line;
while (std::getline(ss, line)) {
if (!line.empty() && line.front() == '{') json_line = line;
}
}
if (json_line.empty()) {
std::lock_guard<std::mutex> lk(g_ax.mu);
g_ax.last_error = "no JSON object in stdout";
g_ax.status = "Failed";
g_ax.busy.store(false);
return;
}
crude_json::value v = crude_json::value::parse(json_line);
if (!v.is_object()) {
std::lock_guard<std::mutex> lk(g_ax.mu);
g_ax.last_error = "stdout is not a JSON object";
g_ax.status = "Failed";
g_ax.busy.store(false);
return;
}
if (v.contains("error")) {
std::lock_guard<std::mutex> lk(g_ax.mu);
g_ax.last_error = v["error"].is_string() ? v["error"].get<std::string>() : "error";
g_ax.status = "Failed";
g_ax.busy.store(false);
return;
}
std::vector<SchemaField> fields;
std::string tab_id;
if (v.contains("tab_id") && v["tab_id"].is_string()) tab_id = v["tab_id"].get<std::string>();
if (v.contains("fields") && v["fields"].is_array()) {
const auto& arr = v["fields"].get<crude_json::array>();
for (size_t i = 0; i < arr.size(); ++i) {
const auto& f = arr[i];
if (!f.is_object()) continue;
SchemaField sf;
auto getstr = [&](const char* k){
if (!f.contains(k)) return std::string();
const auto& x = f[k];
if (x.is_string()) return x.get<std::string>();
if (x.is_null()) return std::string();
return x.dump();
};
sf.field = getstr("field");
sf.selector = getstr("selector");
sf.sample = getstr("sample_value");
if (sf.sample.empty()) sf.sample = getstr("sample");
sf.type = getstr("type");
if (sf.type.empty()) sf.type = "string";
sf.keep = true;
if (!sf.field.empty()) fields.push_back(std::move(sf));
}
}
{
std::lock_guard<std::mutex> lk(g_ax.mu);
g_ax.schema = std::move(fields);
g_ax.proposed_tab_id = tab_id;
g_ax.status = "Schema proposed (" + std::to_string(g_ax.schema.size()) + " fields)";
}
g_ax.busy.store(false);
}).detach();
}
std::string build_extraction_js(const std::vector<SchemaField>& schema) {
std::ostringstream js;
js << "(function(){var o={};";
for (const auto& f : schema) {
if (!f.keep || f.field.empty() || f.selector.empty()) continue;
// Map type -> coerce.
std::string sel = js_escape(f.selector);
js << "try{var e=document.querySelector(\"" << sel << "\");";
js << "o[\"" << js_escape(f.field) << "\"]=";
if (f.type == "number") {
js << "e?parseFloat((e.innerText||e.textContent||'').replace(/[^0-9.\\-]/g,'')):null;";
} else if (f.type == "url") {
js << "e?(e.href||e.src||null):null;";
} else if (f.type == "bool") {
js << "e?true:false;";
} else {
js << "e?(e.innerText||e.textContent||'').trim():null;";
}
js << "}catch(_){o[\"" << js_escape(f.field) << "\"]=null;}";
}
js << "return o;})()";
return js.str();
}
void run_test_extraction(int port, const std::string& tab_id) {
std::vector<SchemaField> sc;
{
std::lock_guard<std::mutex> lk(g_ax.mu);
sc = g_ax.schema;
}
std::string js = build_extraction_js(sc);
if (g_ax.busy.exchange(true)) return;
{
std::lock_guard<std::mutex> lk(g_ax.mu);
g_ax.status = "Running extraction in tab...";
g_ax.test_output.clear();
}
std::thread([port, tab_id, js]() {
const char* code = R"PY(
import sys, os, json, traceback
root = os.environ.get('FN_REGISTRY_ROOT', '')
if not root:
print(json.dumps({"error":"FN_REGISTRY_ROOT not set"})); sys.exit(2)
for sub in ('pipelines','core','infra','browser'):
sys.path.insert(0, os.path.join(root, 'python', 'functions', sub))
try:
port = int(sys.argv[1])
tab_id = sys.argv[2]
js = sys.argv[3]
# Best-effort: reuse cdp_runtime_evaluate si existe; sino, hablar CDP directo.
try:
from cdp_runtime_evaluate import cdp_runtime_evaluate
out = cdp_runtime_evaluate(port, tab_id, js, return_by_value=True)
except Exception:
import urllib.request, json as _j, websocket
ws_url = None
with urllib.request.urlopen(f'http://127.0.0.1:{port}/json') as r:
for t in _j.loads(r.read()):
if t.get('id') == tab_id:
ws_url = t.get('webSocketDebuggerUrl'); break
if not ws_url: raise RuntimeError('tab not found')
w = websocket.create_connection(ws_url, timeout=10)
w.send(_j.dumps({"id":1,"method":"Runtime.evaluate","params":{
"expression": js, "returnByValue": True}}))
out = _j.loads(w.recv()).get('result', {}).get('result', {}).get('value')
w.close()
print(json.dumps({"result": out}))
except Exception as e:
print(json.dumps({"error": str(e), "trace": traceback.format_exc()})); sys.exit(1)
)PY";
std::vector<std::string> argv;
argv.push_back(py_resolve_interpreter());
argv.push_back("-c");
argv.push_back(code);
argv.push_back(std::to_string(port));
argv.push_back(tab_id);
argv.push_back(js);
PyResult r = py_run(argv, 30000);
{
std::lock_guard<std::mutex> lk(g_ax.mu);
if (r.exit_code != 0) {
g_ax.last_error = r.error.empty() ? "python exited non-zero" : r.error;
g_ax.status = "Test failed";
g_ax.test_output = r.stdout_data;
} else {
g_ax.test_output = r.stdout_data;
g_ax.status = "Test OK";
}
g_ax.raw_python_output = r.stdout_data;
}
g_ax.busy.store(false);
}).detach();
}
std::string yaml_quote(const std::string& s) {
bool needs = s.empty() || s.find_first_of(":#\"'") != std::string::npos
|| s.find_first_of(" \t") == 0;
if (!needs) return s;
std::string out = "\"";
for (char c : s) {
if (c == '"' || c == '\\') out += '\\';
out += c;
}
out += "\"";
return out;
}
bool ensure_dir(const std::string& path) {
#ifdef _WIN32
return CreateDirectoryA(path.c_str(), nullptr) || GetLastError() == ERROR_ALREADY_EXISTS;
#else
if (mkdir(path.c_str(), 0755) == 0) return true;
return errno == EEXIST;
#endif
}
void save_recipe(const std::string& name, const std::string& url) {
std::string root = py_resolve_registry_root();
if (root.empty()) {
std::lock_guard<std::mutex> lk(g_ax.mu);
g_ax.last_error = "FN_REGISTRY_ROOT not set; cannot resolve recipes dir";
g_ax.status = "Save failed";
return;
}
std::string slug = slugify(name);
std::string sep =
#ifdef _WIN32
"\\";
#else
"/";
#endif
std::string dir = root + sep + "projects" + sep + "navegator" + sep + "profiles"
+ sep + "default" + sep + "recipes";
// crear directorios padres uno a uno (best effort).
#ifdef _WIN32
{
std::string acc;
for (size_t i = 0; i < dir.size(); ++i) {
if (dir[i] == '\\' && i > 2) {
acc.assign(dir.begin(), dir.begin() + i);
CreateDirectoryA(acc.c_str(), nullptr);
}
}
CreateDirectoryA(dir.c_str(), nullptr);
}
#else
{
std::string acc;
for (size_t i = 0; i < dir.size(); ++i) {
if (dir[i] == '/' && i > 0) {
acc.assign(dir.begin(), dir.begin() + i);
mkdir(acc.c_str(), 0755);
}
}
mkdir(dir.c_str(), 0755);
}
#endif
std::string path = dir + sep + slug + ".yaml";
std::vector<SchemaField> sc;
std::string url_used = url;
{
std::lock_guard<std::mutex> lk(g_ax.mu);
sc = g_ax.schema;
}
std::string js = build_extraction_js(sc);
std::ostringstream y;
y << "name: " << yaml_quote(slug) << "\n";
y << "url_pattern: " << yaml_quote(url_used) << "\n";
y << "steps:\n";
if (!sc.empty() && !sc.front().selector.empty()) {
y << " - wait_selector: " << yaml_quote(sc.front().selector) << "\n";
}
y << " - js: |\n";
// indent js with 6 spaces
{
std::stringstream ss(js);
std::string line;
while (std::getline(ss, line)) y << " " << line << "\n";
}
y << "output:\n";
y << " schema:\n";
for (const auto& f : sc) {
if (!f.keep) continue;
y << " - name: " << yaml_quote(f.field)
<< " type: " << yaml_quote(f.type)
<< " selector: " << yaml_quote(f.selector) << "\n";
}
y << " sink: data_factory.runs\n";
std::ofstream f(path, std::ios::binary);
if (!f) {
std::lock_guard<std::mutex> lk(g_ax.mu);
g_ax.last_error = "could not write " + path;
g_ax.status = "Save failed";
return;
}
f << y.str();
f.close();
// INSERT en data_factory.db via subprocess sqlite3 (NO bloquea si falla).
std::thread([root, slug]() {
const char* code = R"PY(
import sys, os, sqlite3, traceback
try:
root = sys.argv[1]
name = sys.argv[2]
db_path = os.path.join(root, 'apps', 'data_factory', 'data_factory.db')
if not os.path.exists(db_path):
print("data_factory.db not found at " + db_path); sys.exit(0)
conn = sqlite3.connect(db_path)
cur = conn.cursor()
cur.execute("""INSERT OR IGNORE INTO nodes(id, kind, name, function_id, description,
schedule_cron, enabled, tags_csv, created_at, updated_at)
VALUES(?, 'extractor', ?, 'cdp_extract_recipe_py_pipelines', ?, '',
1, 'navegator,recipe', datetime('now'), datetime('now'))""",
(name, name, "auto-extract recipe " + name))
conn.commit(); conn.close()
print("ok")
except Exception as e:
print(traceback.format_exc())
)PY";
std::vector<std::string> argv;
argv.push_back(py_resolve_interpreter());
argv.push_back("-c");
argv.push_back(code);
argv.push_back(root);
argv.push_back(slug);
(void)py_run(argv, 10000);
}).detach();
{
std::lock_guard<std::mutex> lk(g_ax.mu);
g_ax.status = "Recipe saved: " + path;
}
}
} // anon
void render_autoextract_panel(bool* p_open) {
if (!ImGui::Begin(TI_BOX " AutoExtract", p_open)) {
ImGui::End();
return;
}
int port = 0;
{
std::lock_guard<std::mutex> lk(g_session().mu);
port = g_session().selected_port;
}
if (port <= 0) {
ImGui::TextDisabled("Select a browser in the Browsers panel.");
ImGui::End();
return;
}
ImGui::TextUnformatted("URL:");
ImGui::SameLine();
ImGui::SetNextItemWidth(420);
ImGui::InputText("##ax_url", g_ax.url_input, sizeof(g_ax.url_input));
ImGui::SameLine();
bool busy = g_ax.busy.load();
if (busy) ImGui::BeginDisabled();
if (ImGui::Button(TI_PLAYER_PLAY " Open & Analyze")) {
run_open_and_analyze(port, g_ax.url_input);
}
if (busy) ImGui::EndDisabled();
if (busy) {
ImGui::SameLine();
ImGui::TextDisabled("working...");
}
{
std::lock_guard<std::mutex> lk(g_ax.mu);
if (!g_ax.status.empty()) ImGui::Text("Status: %s", g_ax.status.c_str());
if (!g_ax.last_error.empty()) {
ImGui::PushStyleColor(ImGuiCol_Text, fn_tokens::colors::error);
ImGui::TextWrapped("Error: %s", g_ax.last_error.c_str());
ImGui::PopStyleColor();
}
}
ImGui::Separator();
ImGui::TextDisabled("Proposed schema (editable):");
std::vector<SchemaField> sc_copy;
{
std::lock_guard<std::mutex> lk(g_ax.mu);
sc_copy = g_ax.schema;
}
if (ImGui::BeginTable("##ax_schema", 5, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg)) {
ImGui::TableSetupColumn("field");
ImGui::TableSetupColumn("selector");
ImGui::TableSetupColumn("sample");
ImGui::TableSetupColumn("type");
ImGui::TableSetupColumn("keep");
ImGui::TableHeadersRow();
bool dirty = false;
for (size_t i = 0; i < sc_copy.size(); ++i) {
ImGui::TableNextRow();
ImGui::PushID((int)i);
ImGui::TableNextColumn();
char fb[128]; std::snprintf(fb, sizeof(fb), "%s", sc_copy[i].field.c_str());
if (ImGui::InputText("##field", fb, sizeof(fb))) { sc_copy[i].field = fb; dirty = true; }
ImGui::TableNextColumn();
char sb[512]; std::snprintf(sb, sizeof(sb), "%s", sc_copy[i].selector.c_str());
if (ImGui::InputText("##selector", sb, sizeof(sb))) { sc_copy[i].selector = sb; dirty = true; }
ImGui::TableNextColumn();
ImGui::TextWrapped("%s", sc_copy[i].sample.c_str());
ImGui::TableNextColumn();
char tb[32]; std::snprintf(tb, sizeof(tb), "%s", sc_copy[i].type.c_str());
if (ImGui::InputText("##type", tb, sizeof(tb))) { sc_copy[i].type = tb; dirty = true; }
ImGui::TableNextColumn();
bool keep = sc_copy[i].keep;
if (ImGui::Checkbox("##keep", &keep)) { sc_copy[i].keep = keep; dirty = true; }
ImGui::PopID();
}
ImGui::EndTable();
if (dirty) {
std::lock_guard<std::mutex> lk(g_ax.mu);
g_ax.schema = sc_copy;
}
}
ImGui::Separator();
std::string tab_id;
{
std::lock_guard<std::mutex> lk(g_ax.mu);
tab_id = g_ax.proposed_tab_id;
}
if (busy) ImGui::BeginDisabled();
if (ImGui::Button(TI_FLASK " Test extraction") && !tab_id.empty()) {
run_test_extraction(port, tab_id);
}
if (busy) ImGui::EndDisabled();
ImGui::SameLine();
ImGui::TextDisabled("Recipe name:");
ImGui::SameLine();
ImGui::SetNextItemWidth(200);
ImGui::InputText("##rname", g_ax.recipe_name, sizeof(g_ax.recipe_name));
ImGui::SameLine();
if (ImGui::Button(TI_DEVICE_FLOPPY " Save as recipe")) {
if (g_ax.recipe_name[0]) save_recipe(g_ax.recipe_name, g_ax.url_input);
}
{
std::lock_guard<std::mutex> lk(g_ax.mu);
if (!g_ax.test_output.empty()) {
ImGui::Separator();
ImGui::TextDisabled("Test output:");
ImGui::InputTextMultiline("##test_out", (char*)g_ax.test_output.c_str(),
g_ax.test_output.size() + 1,
ImVec2(-1, 120), ImGuiInputTextFlags_ReadOnly);
}
}
ImGui::End();
}
} // namespace navegator
+32 -19
View File
@@ -28,6 +28,8 @@ void render_browsers_panel(bool* p_open);
void render_tabs_panel(bool* p_open); void render_tabs_panel(bool* p_open);
void render_tab_detail_panel(bool* p_open); void render_tab_detail_panel(bool* p_open);
void render_network_panel(bool* p_open); void render_network_panel(bool* p_open);
void render_autoextract_panel(bool* p_open);
void render_recipes_panel(bool* p_open);
// ---------- Visibilidad de paneles ----------------------------------------- // ---------- Visibilidad de paneles -----------------------------------------
bool show_browsers = true; bool show_browsers = true;
@@ -35,14 +37,18 @@ bool show_tabs = true;
bool show_tab_detail = false; bool show_tab_detail = false;
bool show_network = false; bool show_network = false;
bool show_agent = false; bool show_agent = false;
bool show_autoextract = false;
bool show_recipes = false;
namespace { namespace {
constexpr fn_ui::PanelToggle k_panels[] = { constexpr fn_ui::PanelToggle k_panels[] = {
{"Browsers", "Ctrl+1", &show_browsers}, {"Browsers", "Ctrl+1", &show_browsers},
{"Tabs", "Ctrl+2", &show_tabs}, {"Tabs", "Ctrl+2", &show_tabs},
{"Tab Detail", "Ctrl+3", &show_tab_detail}, {"Tab Detail", "Ctrl+3", &show_tab_detail},
{"Network", "Ctrl+4", &show_network}, {"Network", "Ctrl+4", &show_network},
{"Agent", "Ctrl+5", &show_agent}, {"Agent", "Ctrl+5", &show_agent},
{"AutoExtract", "Ctrl+6", &show_autoextract},
{"Recipes", "Ctrl+7", &show_recipes},
}; };
} // anon } // anon
@@ -118,14 +124,17 @@ void extra_del(const std::string& name) {
// ---------- API publica para tests + main ---------------------------------- // ---------- API publica para tests + main ----------------------------------
std::string capture_panel_state() { std::string capture_panel_state() {
char buf[256]; char buf[384];
std::snprintf(buf, sizeof(buf), std::snprintf(buf, sizeof(buf),
"{\"browsers\":%d,\"tabs\":%d,\"tab_detail\":%d,\"network\":%d,\"agent\":%d}", "{\"browsers\":%d,\"tabs\":%d,\"tab_detail\":%d,\"network\":%d,\"agent\":%d,"
"\"autoextract\":%d,\"recipes\":%d}",
show_browsers ? 1 : 0, show_browsers ? 1 : 0,
show_tabs ? 1 : 0, show_tabs ? 1 : 0,
show_tab_detail ? 1 : 0, show_tab_detail ? 1 : 0,
show_network ? 1 : 0, show_network ? 1 : 0,
show_agent ? 1 : 0); show_agent ? 1 : 0,
show_autoextract ? 1 : 0,
show_recipes ? 1 : 0);
return buf; return buf;
} }
@@ -138,16 +147,18 @@ void apply_panel_state(const std::string& json) {
if (p >= json.size()) return def; if (p >= json.size()) return def;
return json[p] == '1' || (json.compare(p, 4, "true") == 0); return json[p] == '1' || (json.compare(p, 4, "true") == 0);
}; };
show_browsers = pull("browsers", true); show_browsers = pull("browsers", true);
show_tabs = pull("tabs", true); show_tabs = pull("tabs", true);
show_tab_detail = pull("tab_detail", true); show_tab_detail = pull("tab_detail", true);
show_network = pull("network", true); show_network = pull("network", true);
show_agent = pull("agent", false); show_agent = pull("agent", false);
show_autoextract = pull("autoextract", false);
show_recipes = pull("recipes", false);
} }
void open_all_panels() { void open_all_panels() {
show_browsers = show_tabs = show_tab_detail = show_network = true; show_browsers = show_tabs = show_tab_detail = show_network = true;
// agent es opt-in: ni save/apply ni reset lo abren por defecto. // agent / autoextract / recipes son opt-in: no se reabren con Reset.
} }
void setup_layouts(fn::AppConfig& cfg) { void setup_layouts(fn::AppConfig& cfg) {
@@ -291,11 +302,13 @@ void teardown_layouts() {
// — no llamamos DockSpaceOverViewport aqui para no duplicar. // — no llamamos DockSpaceOverViewport aqui para no duplicar.
void render() { void render() {
using namespace navegator; using namespace navegator;
if (show_browsers) render_browsers_panel(&show_browsers); if (show_browsers) render_browsers_panel(&show_browsers);
if (show_tabs) render_tabs_panel(&show_tabs); if (show_tabs) render_tabs_panel(&show_tabs);
if (show_tab_detail) render_tab_detail_panel(&show_tab_detail); if (show_tab_detail) render_tab_detail_panel(&show_tab_detail);
if (show_network) render_network_panel(&show_network); if (show_network) render_network_panel(&show_network);
if (show_agent) app_agent::chat_render(&show_agent); if (show_agent) app_agent::chat_render(&show_agent);
if (show_autoextract) render_autoextract_panel(&show_autoextract);
if (show_recipes) render_recipes_panel(&show_recipes);
} }
#ifndef FN_TEST_BUILD #ifndef FN_TEST_BUILD
+133
View File
@@ -20,6 +20,8 @@
#include "local_api.h" #include "local_api.h"
#include "cdp_http.h" #include "cdp_http.h"
#include "session_state.h" #include "session_state.h"
#include "picker_state.h"
#include "py_subprocess.h"
#include <algorithm> #include <algorithm>
#include <atomic> #include <atomic>
@@ -27,6 +29,7 @@
#include <cstdio> #include <cstdio>
#include <cstring> #include <cstring>
#include <cstdlib> #include <cstdlib>
#include <fstream>
#include <map> #include <map>
#include <mutex> #include <mutex>
#include <sstream> #include <sstream>
@@ -532,6 +535,56 @@ void render_tab_detail_panel(bool* p_open) {
ImGui::Text("Browser :%d", port); ImGui::Text("Browser :%d", port);
ImGui::Text("Tab id %s", sel_id.c_str()); ImGui::Text("Tab id %s", sel_id.c_str());
ImGui::TextWrapped("WS %s", sel_ws.c_str()); ImGui::TextWrapped("WS %s", sel_ws.c_str());
ImGui::Separator();
// --- Pick element ---
bool active = picker_is_active();
if (active) ImGui::PushStyleColor(ImGuiCol_Button, fn_tokens::colors::primary);
if (ImGui::Button(active ? (TI_FLASK " Picking... (click to stop)")
: (TI_FLASK " Pick element"))) {
if (active) {
picker_stop();
} else {
std::string err = picker_start(port, sel_id, sel_ws);
if (!err.empty()) {
ImGui::PushStyleColor(ImGuiCol_Text, fn_tokens::colors::error);
ImGui::TextWrapped("Pick error: %s", err.c_str());
ImGui::PopStyleColor();
}
}
}
if (active) ImGui::PopStyleColor();
ImGui::SameLine();
ImGui::TextDisabled("(injects functions/browser/cdp_pick_element_js.js via CDP)");
PickedElement last = picker_last();
if (last.valid) {
ImGui::Separator();
ImGui::TextDisabled("Last picked:");
if (ImGui::BeginChild("##picked_card", ImVec2(0, 110), true)) {
ImGui::Text("tag: %s", last.tag.c_str());
ImGui::TextWrapped("selector: %s", last.selector.c_str());
ImGui::TextWrapped("xpath: %s", last.xpath.c_str());
std::string short_text = last.text;
if (short_text.size() > 200) short_text = short_text.substr(0, 200) + "...";
ImGui::TextWrapped("text: %s", short_text.c_str());
}
ImGui::EndChild();
if (ImGui::SmallButton("Copy selector")) {
ImGui::SetClipboardText(last.selector.c_str());
}
ImGui::SameLine();
if (ImGui::SmallButton("Save to recipe (new)")) {
// Placeholder: futura integracion para crear recipe nueva con un
// unico field a partir del selector. Por ahora se copia.
ImGui::SetClipboardText(last.selector.c_str());
}
ImGui::SameLine();
if (ImGui::SmallButton("Clear")) picker_clear_last();
} else {
ImGui::TextDisabled("(no picked element yet — click 'Pick element' and click on the page)");
}
ImGui::Separator(); ImGui::Separator();
ImGui::TextWrapped( ImGui::TextWrapped(
"Tab Detail (HTML preview + screenshot + Runtime.evaluate REPL) llega " "Tab Detail (HTML preview + screenshot + Runtime.evaluate REPL) llega "
@@ -744,8 +797,88 @@ void draw_request_detail(const NetworkRequest& r, NetworkSession* net) {
ImGui::EndTabItem(); ImGui::EndTabItem();
} }
if (ImGui::BeginTabItem("Response")) { if (ImGui::BeginTabItem("Response")) {
// Detect JSON response (content-type: application/json).
bool is_json = false;
for (const auto& h : r.response_headers) {
std::string n = h.name; std::transform(n.begin(), n.end(), n.begin(), ::tolower);
if (n == "content-type" && h.value.find("application/json") != std::string::npos) {
is_json = true; break;
}
}
if (r.body_fetched && !r.body_text.empty()) { if (r.body_fetched && !r.body_text.empty()) {
if (ImGui::SmallButton("Copy")) copy_to_clipboard(r.body_text); if (ImGui::SmallButton("Copy")) copy_to_clipboard(r.body_text);
if (is_json) {
ImGui::SameLine();
if (ImGui::SmallButton(TI_LIST_DETAILS " Parse")) {
// Llama infer_json_rows_schema via subprocess.
static std::string g_parsed; // sticky entre frames
g_parsed.clear();
const char* code = R"PY(
import sys, os, json, traceback
root = os.environ.get('FN_REGISTRY_ROOT', '')
if not root:
print(json.dumps({"error":"FN_REGISTRY_ROOT not set"})); sys.exit(2)
for sub in ('core',):
sys.path.insert(0, os.path.join(root, 'python', 'functions', sub))
try:
from infer_json_rows_schema import infer_json_rows_schema
body = sys.stdin.read()
obj = json.loads(body)
res = infer_json_rows_schema(obj)
print(json.dumps(res if isinstance(res, dict) else {"result": res}))
except Exception as e:
print(json.dumps({"error": str(e), "trace": traceback.format_exc()})); sys.exit(1)
)PY";
std::vector<std::string> argv;
argv.push_back(py_resolve_interpreter());
argv.push_back("-c");
argv.push_back(code);
// Lanza un thread y deja log en g_net_ui.* via clipboard (simple).
std::string body = r.body_text;
std::thread([argv, body]() {
(void)argv; (void)body;
// py_run no soporta stdin todavia; usamos un archivo temporal.
// Para mantener el patch minimo: escribimos body a archivo temp,
// y pasamos su path como argv extra; el script lo lee.
char tmp[256];
std::snprintf(tmp, sizeof(tmp), "%s%snav_body_%lld.json",
#ifdef _WIN32
getenv("TEMP") ? getenv("TEMP") : ".", "\\",
#else
"/tmp", "/",
#endif
(long long)std::time(nullptr));
{
std::ofstream f(tmp, std::ios::binary);
if (f) f.write(body.data(), body.size());
}
const char* code2 = R"PY(
import sys, os, json, traceback
root = os.environ.get('FN_REGISTRY_ROOT', '')
if not root:
print(json.dumps({"error":"FN_REGISTRY_ROOT not set"})); sys.exit(2)
for sub in ('core',):
sys.path.insert(0, os.path.join(root, 'python', 'functions', sub))
try:
from infer_json_rows_schema import infer_json_rows_schema
with open(sys.argv[1], 'rb') as f: body = f.read().decode('utf-8','replace')
obj = json.loads(body)
res = infer_json_rows_schema(obj)
print(json.dumps(res if isinstance(res, dict) else {"result": res}))
except Exception as e:
print(json.dumps({"error": str(e), "trace": traceback.format_exc()})); sys.exit(1)
)PY";
std::vector<std::string> a2 = {
py_resolve_interpreter(), "-c", code2, tmp
};
PyResult pr = py_run(a2, 30000);
ImGui::SetClipboardText(pr.stdout_data.c_str());
std::remove(tmp);
}).detach();
}
ImGui::SameLine();
ImGui::TextDisabled("(result -> clipboard)");
}
ImGui::Separator(); ImGui::Separator();
ImGui::InputTextMultiline("##body", (char*)r.body_text.c_str(), r.body_text.size() + 1, ImGui::InputTextMultiline("##body", (char*)r.body_text.c_str(), r.body_text.size() + 1,
ImVec2(-1, -1), ImGuiInputTextFlags_ReadOnly); ImVec2(-1, -1), ImGuiInputTextFlags_ReadOnly);
+197
View File
@@ -0,0 +1,197 @@
#include "picker_state.h"
#include "cdp_ws.h"
#include "py_subprocess.h" // for py_resolve_registry_root
// crude_json del vendor imgui-node-editor (ya linkado por CMakeLists.txt).
#include "crude_json.h"
#include <atomic>
#include <cstdio>
#include <fstream>
#include <mutex>
#include <sstream>
#include <thread>
namespace navegator {
namespace {
std::mutex g_mu;
std::unique_ptr<CdpWs> g_ws;
std::atomic<bool> g_active{false};
std::atomic<bool> g_stop_pump{false};
std::thread g_pump_thread;
PickedElement g_last;
std::string slurp(const std::string& path) {
std::ifstream f(path, std::ios::binary);
if (!f) return "";
std::ostringstream ss; ss << f.rdbuf();
return ss.str();
}
// JS-string-literal escape (NO JSON encoding — el JSON wrapper se hace al construir params).
std::string js_str_escape(const std::string& s) {
std::string out; out.reserve(s.size() + 8);
for (char c : s) {
switch (c) {
case '\\': out += "\\\\"; break;
case '"': out += "\\\""; break;
case '\n': out += "\\n"; break;
case '\r': out += "\\r"; break;
case '\t': out += "\\t"; break;
default:
if ((unsigned char)c < 0x20) {
char buf[8]; std::snprintf(buf, sizeof(buf), "\\u%04x", (unsigned)c);
out += buf;
} else out += c;
}
}
return out;
}
void pump_loop() {
while (!g_stop_pump.load()) {
std::vector<std::string> msgs;
{
std::lock_guard<std::mutex> lk(g_mu);
if (!g_ws || !g_ws->is_connected()) break;
msgs = g_ws->drain(64);
}
for (const auto& m : msgs) {
crude_json::value v = crude_json::value::parse(m);
if (!v.is_object()) continue;
// Filtrar method=Runtime.consoleAPICalled, args[0].value=="__fn_picked__"
if (!v.contains("method")) continue;
const auto& method = v["method"];
if (!method.is_string()) continue;
if (method.get<std::string>() != "Runtime.consoleAPICalled") continue;
if (!v.contains("params")) continue;
const auto& params = v["params"];
if (!params.is_object() || !params.contains("args")) continue;
const auto& args = params["args"];
if (!args.is_array()) continue;
const auto& args_arr = args.get<crude_json::array>();
if (args_arr.size() < 2) continue;
const auto& a0 = args_arr[0];
if (!a0.is_object() || !a0.contains("value")) continue;
const auto& v0 = a0["value"];
if (!v0.is_string() || v0.get<std::string>() != "__fn_picked__") continue;
const auto& a1 = args_arr[1];
if (!a1.is_object() || !a1.contains("value")) continue;
const auto& v1 = a1["value"];
// v1.value puede ser string (JSON serializado) o un objeto. El JS
// hace console.log("__fn_picked__", JSON.stringify(payload)).
std::string payload;
if (v1.is_string()) payload = v1.get<std::string>();
else payload = v1.dump();
crude_json::value p = crude_json::value::parse(payload);
if (!p.is_object()) continue;
PickedElement el;
auto get = [&](const char* k) -> std::string {
if (!p.contains(k)) return "";
const auto& x = p[k];
if (x.is_string()) return x.get<std::string>();
if (x.is_null()) return "";
return x.dump();
};
el.selector = get("selector");
el.xpath = get("xpath");
el.tag = get("tag");
el.text = get("text");
if (p.contains("rect")) {
el.rect_str = p["rect"].dump();
}
el.valid = true;
{
std::lock_guard<std::mutex> lk(g_mu);
g_last = std::move(el);
}
}
std::this_thread::sleep_for(std::chrono::milliseconds(80));
}
g_active.store(false);
}
} // anon
std::string picker_load_js() {
std::string root = py_resolve_registry_root();
if (root.empty()) return "";
#ifdef _WIN32
std::string path = root + "\\functions\\browser\\cdp_pick_element_js.js";
#else
std::string path = root + "/functions/browser/cdp_pick_element_js.js";
#endif
return slurp(path);
}
std::string picker_start(int /*port*/, const std::string& /*tab_id*/, const std::string& ws_url) {
if (ws_url.empty()) return "no ws_url";
picker_stop();
std::string js = picker_load_js();
if (js.empty()) return "could not load cdp_pick_element_js.js (set FN_REGISTRY_ROOT)";
std::string host, path;
int p = 0;
if (!CdpWs::parse_ws_url(ws_url, host, p, path)) return "invalid ws_url";
auto ws = std::make_unique<CdpWs>();
CdpWsConfig cfg;
cfg.host = host;
cfg.port = p;
cfg.path = path;
cfg.timeout_ms = 5000;
std::string err;
if (!ws->connect(cfg, &err)) return "ws connect failed: " + err;
// Enable Runtime (necesario para consoleAPICalled).
ws->send_command("Runtime.enable", "");
// Inyectar el JS via Runtime.evaluate. expression es el codigo.
// El payload del JS termina con un IIFE; envolvemos en wrapper sin returnByValue.
std::ostringstream params;
params << "{\"expression\":\"" << js_str_escape(js)
<< "\",\"includeCommandLineAPI\":true"
<< ",\"awaitPromise\":false"
<< ",\"returnByValue\":false}";
ws->send_command("Runtime.evaluate", params.str());
{
std::lock_guard<std::mutex> lk(g_mu);
g_ws = std::move(ws);
}
g_stop_pump.store(false);
g_active.store(true);
g_pump_thread = std::thread(pump_loop);
return "";
}
void picker_stop() {
g_stop_pump.store(true);
{
std::lock_guard<std::mutex> lk(g_mu);
if (g_ws) g_ws->close();
}
if (g_pump_thread.joinable()) g_pump_thread.join();
{
std::lock_guard<std::mutex> lk(g_mu);
g_ws.reset();
}
g_active.store(false);
}
bool picker_is_active() { return g_active.load(); }
PickedElement picker_last() {
std::lock_guard<std::mutex> lk(g_mu);
return g_last;
}
void picker_clear_last() {
std::lock_guard<std::mutex> lk(g_mu);
g_last = PickedElement{};
}
} // namespace navegator
+51
View File
@@ -0,0 +1,51 @@
#pragma once
// picker_state — Pick element: inyecta JS via CDP Runtime.evaluate, escucha
// `Runtime.consoleAPICalled` filtrando args[0].value == "__fn_picked__", y
// publica el ultimo elemento elegido para que Tab Detail panel lo renderice.
//
// Decisiones:
// - WS propio (CdpWs) por panel; no comparte el de NetworkSession para
// no entrelazar Runtime.* con Network.*.
// - El JS payload se lee de ${FN_REGISTRY_ROOT}/functions/browser/cdp_pick_element_js.js
// (path por env, fallback hardcoded relativo al exe).
// - Estado global thread-safe.
#include <atomic>
#include <memory>
#include <mutex>
#include <string>
namespace navegator {
struct PickedElement {
std::string selector;
std::string xpath;
std::string tag;
std::string text;
std::string rect_str; // "x,y,w,h" formato libre
bool valid = false;
};
class CdpWs; // fwd
// Inicia modo pick para el tab dado. Si ya esta activo, reactiva.
// Carga el JS desde el path de funciones del registry. Devuelve "" si OK,
// error en caso contrario.
std::string picker_start(int port, const std::string& tab_id, const std::string& ws_url);
// Para el modo pick (cierra WS).
void picker_stop();
bool picker_is_active();
// Snapshot atomico del ultimo elemento capturado.
PickedElement picker_last();
// Limpia el ultimo capturado.
void picker_clear_last();
// Loader del payload JS. Devuelve "" si no se pudo leer.
std::string picker_load_js();
} // namespace navegator
+239
View File
@@ -0,0 +1,239 @@
#include "py_subprocess.h"
#include "app_base.h"
#include <atomic>
#include <chrono>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <sstream>
#include <thread>
#ifdef _WIN32
# define WIN32_LEAN_AND_MEAN
# include <windows.h>
#else
# include <sys/types.h>
# include <sys/wait.h>
# include <unistd.h>
#endif
namespace navegator {
namespace {
bool file_exists(const std::string& p) {
if (p.empty()) return false;
FILE* f = std::fopen(p.c_str(), "rb");
if (!f) return false;
std::fclose(f);
return true;
}
std::string getenv_str(const char* name) {
const char* v = std::getenv(name);
return v ? std::string(v) : std::string();
}
} // anon
std::string py_resolve_registry_root() {
std::string s = getenv_str("FN_REGISTRY_ROOT");
if (!s.empty()) return s;
// Fallback: deducir desde exe_dir subiendo hacia el repo. Por defecto
// la app vive en projects/navegator/apps/<app>/, asi que 4 niveles
// arriba esta la raiz.
std::string exe = fn::exe_dir();
if (exe.empty()) return "";
// Si la app esta en Desktop\apps\<a>\, no podemos deducir — devolver "".
// El user debe setear FN_REGISTRY_ROOT en el entorno.
if (exe.find("Desktop") != std::string::npos) return "";
// Subir 4 niveles.
std::string p = exe;
for (int i = 0; i < 4; ++i) {
auto pos = p.find_last_of("/\\");
if (pos == std::string::npos) return "";
p = p.substr(0, pos);
}
return p;
}
std::string py_resolve_interpreter() {
std::string root = py_resolve_registry_root();
if (!root.empty()) {
#ifdef _WIN32
std::string venv_py = root + "\\python\\.venv\\Scripts\\python.exe";
if (file_exists(venv_py)) return venv_py;
#else
std::string venv_py = root + "/python/.venv/bin/python3";
if (file_exists(venv_py)) return venv_py;
#endif
}
#ifdef _WIN32
return "python"; // confiar en PATH (py launcher o python.exe)
#else
return "python3";
#endif
}
// ---------------------------------------------------------------------------
// Windows impl
// ---------------------------------------------------------------------------
#ifdef _WIN32
namespace {
std::string quote_arg_win(const std::string& a) {
bool need_q = a.empty() || a.find_first_of(" \t\"") != std::string::npos;
if (!need_q) return a;
std::string out;
out.reserve(a.size() + 4);
out += '"';
for (char c : a) {
if (c == '"') out += "\\\"";
else if (c == '\\') { out += "\\\\"; }
else out += c;
}
out += '"';
return out;
}
} // anon
PyResult py_run(const std::vector<std::string>& argv, int timeout_ms) {
PyResult res;
if (argv.empty()) { res.error = "argv empty"; return res; }
std::string cmd;
for (size_t i = 0; i < argv.size(); ++i) {
if (i) cmd += ' ';
cmd += quote_arg_win(argv[i]);
}
HANDLE r_pipe = nullptr;
HANDLE w_pipe = nullptr;
SECURITY_ATTRIBUTES sa{};
sa.nLength = sizeof(sa);
sa.bInheritHandle = TRUE;
if (!CreatePipe(&r_pipe, &w_pipe, &sa, 0)) {
res.error = "CreatePipe failed";
return res;
}
SetHandleInformation(r_pipe, HANDLE_FLAG_INHERIT, 0);
STARTUPINFOA si{};
si.cb = sizeof(si);
si.dwFlags = STARTF_USESTDHANDLES | STARTF_USESHOWWINDOW;
si.wShowWindow = SW_HIDE;
si.hStdOutput = w_pipe;
si.hStdError = w_pipe;
si.hStdInput = GetStdHandle(STD_INPUT_HANDLE);
PROCESS_INFORMATION pi{};
std::string mutable_cmd = cmd;
BOOL ok = CreateProcessA(
nullptr, mutable_cmd.data(),
nullptr, nullptr, TRUE,
CREATE_NO_WINDOW,
nullptr, nullptr, &si, &pi);
CloseHandle(w_pipe);
if (!ok) {
CloseHandle(r_pipe);
DWORD e = GetLastError();
char buf[128];
std::snprintf(buf, sizeof(buf), "CreateProcess failed err=%lu cmd=%s",
(unsigned long)e, cmd.c_str());
res.error = buf;
return res;
}
// Lector + timeout: spawn thread lector, wait padre con timeout.
std::atomic<bool> done{false};
std::string out;
std::thread reader([&]() {
char buf[4096];
DWORD n = 0;
while (ReadFile(r_pipe, buf, sizeof(buf), &n, nullptr) && n > 0) {
out.append(buf, n);
}
done.store(true);
});
DWORD waited = WaitForSingleObject(pi.hProcess,
timeout_ms > 0 ? (DWORD)timeout_ms : INFINITE);
if (waited == WAIT_TIMEOUT) {
TerminateProcess(pi.hProcess, 1);
res.error = "timeout";
}
DWORD exit_code = 0;
GetExitCodeProcess(pi.hProcess, &exit_code);
CloseHandle(pi.hProcess);
CloseHandle(pi.hThread);
// Pipe se cerrara cuando el proceso terminado libere los handles.
// Cerrar nuestro extremo para desbloquear lector si esta colgado.
CloseHandle(r_pipe);
if (reader.joinable()) reader.join();
res.exit_code = (int)exit_code;
res.stdout_data = std::move(out);
return res;
}
#else // POSIX
PyResult py_run(const std::vector<std::string>& argv, int timeout_ms) {
PyResult res;
if (argv.empty()) { res.error = "argv empty"; return res; }
// Build "cmd args..." via popen for simplicity. Escapado minimo.
std::string cmd;
for (size_t i = 0; i < argv.size(); ++i) {
if (i) cmd += ' ';
cmd += "'";
for (char c : argv[i]) {
if (c == '\'') cmd += "'\\''";
else cmd += c;
}
cmd += "'";
}
cmd += " 2>&1";
(void)timeout_ms;
FILE* pipe = popen(cmd.c_str(), "r");
if (!pipe) { res.error = "popen failed"; return res; }
std::string out;
char buf[4096];
while (fgets(buf, sizeof(buf), pipe)) out.append(buf);
int rc = pclose(pipe);
res.exit_code = WIFEXITED(rc) ? WEXITSTATUS(rc) : -1;
res.stdout_data = std::move(out);
return res;
}
#endif
PyResult py_run_inline(const std::string& code, const std::vector<std::string>& extra_args,
int timeout_ms) {
std::vector<std::string> argv;
argv.push_back(py_resolve_interpreter());
argv.push_back("-c");
argv.push_back(code);
for (const auto& a : extra_args) argv.push_back(a);
return py_run(argv, timeout_ms);
}
void py_run_async(const std::vector<std::string>& argv, int timeout_ms,
std::function<void(PyResult)> on_done) {
std::thread([argv, timeout_ms, cb = std::move(on_done)]() {
PyResult r = py_run(argv, timeout_ms);
if (cb) cb(std::move(r));
}).detach();
}
} // namespace navegator
+53
View File
@@ -0,0 +1,53 @@
#pragma once
// py_subprocess — spawn Python con args y capturar stdout. Lo usan los paneles
// AutoExtract y Recipes para invocar funciones del registry (cdp_open_url_and_wait,
// cdp_get_ax_tree, llm_propose_scraping_schema, cdp_extract_recipe, infer_json_rows_schema).
//
// Decisiones:
// - Heredoc inline: el script Python se pasa via -c "<inline>" para evitar archivos temporales.
// - PATH: usa "python3" o "python". Fallback: ${FN_REGISTRY_ROOT}/python/.venv/Scripts/python.exe
// (Windows venv layout) o /python/.venv/bin/python3 (POSIX).
// - Stdout: capturado completo. El llamante parsea JSON.
// - Stderr: redirigido a stdout para facilitar diagnostico (logs visibles).
// - Sin consola visible en Windows (CREATE_NO_WINDOW).
// - Async wrapper opcional: lanzar en thread y publicar resultado via callback.
#include <functional>
#include <string>
#include <vector>
namespace navegator {
struct PyResult {
int exit_code = -1;
std::string stdout_data;
std::string error; // mensaje propio si CreateProcess/popen fallo
};
// Devuelve la ruta al interprete python a usar. Prioridad:
// 1. ${FN_REGISTRY_ROOT}/python/.venv/Scripts/python.exe (Windows)
// 2. ${FN_REGISTRY_ROOT}/python/.venv/bin/python3 (POSIX/MinGW)
// 3. "python3" en PATH
// 4. "python" en PATH (Windows default)
std::string py_resolve_interpreter();
// Devuelve FN_REGISTRY_ROOT. Si no esta seteada, intenta deducirla:
// - Working dir del exe ".../fn_registry/projects/navegator/apps/<app>".
// - Subiendo 4 niveles desde exe_dir.
std::string py_resolve_registry_root();
// Lanza python con argv. argv[0] DEBE ser el interprete (de py_resolve_interpreter()).
// Hereda env. Timeout en ms (0 = sin timeout). Devuelve PyResult con stdout + exit.
PyResult py_run(const std::vector<std::string>& argv, int timeout_ms = 60000);
// Helper: ejecuta un script inline via `python -c "<code>"` con args extra.
PyResult py_run_inline(const std::string& code, const std::vector<std::string>& extra_args,
int timeout_ms = 60000);
// Async: ejecuta en thread y llama on_done en el thread del worker.
// Captura args por valor; el caller debe sincronizar acceso compartido.
void py_run_async(const std::vector<std::string>& argv, int timeout_ms,
std::function<void(PyResult)> on_done);
} // namespace navegator
+313
View File
@@ -0,0 +1,313 @@
// recipes_panel — Listado de recetas (YAML) en
// projects/navegator/profiles/default/recipes/*.yaml.
//
// Acciones por fila:
// Run -> subprocess Python con cdp_extract_recipe (record_run=True).
// Edit -> abre InputTextMultiline con el YAML; "Save" reescribe.
// Delete -> rm + refresh list.
// Open in data_factory -> noop (placeholder; mostraria link/cmd).
#include "imgui.h"
#include "core/icons_tabler.h"
#include "core/tokens.h"
#include "py_subprocess.h"
#include "session_state.h"
#include <algorithm>
#include <atomic>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <fstream>
#include <mutex>
#include <sstream>
#include <string>
#include <thread>
#include <vector>
#ifdef _WIN32
# define WIN32_LEAN_AND_MEAN
# include <windows.h>
#else
# include <dirent.h>
# include <sys/stat.h>
#endif
namespace navegator {
namespace {
struct RecipeRow {
std::string name;
std::string url_pattern;
std::string yaml_path;
std::string last_run_status;
std::string last_run_at;
int rows_last_run = 0;
};
struct RecipesState {
std::mutex mu;
std::vector<RecipeRow> rows;
std::string status;
std::string last_error;
std::atomic<bool> busy{false};
int editing_idx = -1;
std::string edit_buf;
char edit_textarea[16384] = {0};
};
RecipesState g_rs;
std::string recipes_dir() {
std::string root = py_resolve_registry_root();
if (root.empty()) return "";
#ifdef _WIN32
return root + "\\projects\\navegator\\profiles\\default\\recipes";
#else
return root + "/projects/navegator/profiles/default/recipes";
#endif
}
std::string slurp(const std::string& path) {
std::ifstream f(path, std::ios::binary);
if (!f) return "";
std::ostringstream ss; ss << f.rdbuf();
return ss.str();
}
// Mini-parser YAML especifico: solo extrae name + url_pattern.
void parse_recipe_min(const std::string& body, RecipeRow& r) {
std::stringstream ss(body);
std::string line;
while (std::getline(ss, line)) {
auto strip = [](std::string s){
size_t a = s.find_first_not_of(" \t");
size_t b = s.find_last_not_of(" \t\r");
return (a == std::string::npos) ? std::string() : s.substr(a, b - a + 1);
};
if (line.rfind("name:", 0) == 0) {
r.name = strip(line.substr(5));
if (!r.name.empty() && (r.name.front()=='"' || r.name.front()=='\'')) {
r.name = r.name.substr(1, r.name.size() - 2);
}
} else if (line.rfind("url_pattern:", 0) == 0) {
r.url_pattern = strip(line.substr(12));
if (!r.url_pattern.empty() && (r.url_pattern.front()=='"' || r.url_pattern.front()=='\'')) {
r.url_pattern = r.url_pattern.substr(1, r.url_pattern.size() - 2);
}
}
}
}
std::vector<std::string> list_yaml_files(const std::string& dir) {
std::vector<std::string> out;
#ifdef _WIN32
std::string pattern = dir + "\\*.yaml";
WIN32_FIND_DATAA fd;
HANDLE h = FindFirstFileA(pattern.c_str(), &fd);
if (h == INVALID_HANDLE_VALUE) return out;
do {
out.push_back(dir + "\\" + fd.cFileName);
} while (FindNextFileA(h, &fd));
FindClose(h);
#else
DIR* d = opendir(dir.c_str());
if (!d) return out;
while (auto e = readdir(d)) {
std::string n = e->d_name;
if (n.size() > 5 && n.substr(n.size() - 5) == ".yaml") {
out.push_back(dir + "/" + n);
}
}
closedir(d);
#endif
std::sort(out.begin(), out.end());
return out;
}
void refresh_list() {
std::string dir = recipes_dir();
if (dir.empty()) {
std::lock_guard<std::mutex> lk(g_rs.mu);
g_rs.last_error = "FN_REGISTRY_ROOT not set";
return;
}
auto files = list_yaml_files(dir);
std::vector<RecipeRow> rows;
for (const auto& f : files) {
RecipeRow r; r.yaml_path = f;
std::string body = slurp(f);
parse_recipe_min(body, r);
if (r.name.empty()) {
// fallback al basename sin ext
size_t p1 = f.find_last_of("/\\");
std::string base = (p1 == std::string::npos) ? f : f.substr(p1 + 1);
if (base.size() > 5) base = base.substr(0, base.size() - 5);
r.name = base;
}
rows.push_back(std::move(r));
}
// Anota last_run_* desde data_factory.runs (subprocess sqlite3 best-effort).
// Lo dejamos como TODO — la primera version queda con campos vacios.
std::lock_guard<std::mutex> lk(g_rs.mu);
g_rs.rows = std::move(rows);
g_rs.last_error.clear();
g_rs.status = "Listed " + std::to_string(g_rs.rows.size()) + " recipes";
}
void run_recipe_async(const std::string& yaml_path) {
if (g_rs.busy.exchange(true)) return;
{
std::lock_guard<std::mutex> lk(g_rs.mu);
g_rs.status = "Running " + yaml_path;
}
std::thread([yaml_path]() {
const char* code = R"PY(
import sys, os, json, traceback
root = os.environ.get('FN_REGISTRY_ROOT', '')
if not root:
print(json.dumps({"error":"FN_REGISTRY_ROOT not set"})); sys.exit(2)
for sub in ('pipelines','core','infra'):
sys.path.insert(0, os.path.join(root, 'python', 'functions', sub))
try:
from cdp_extract_recipe import cdp_extract_recipe
path = sys.argv[1]
res = cdp_extract_recipe(path, debug_port=9222, record_run=True)
print(json.dumps(res if isinstance(res, dict) else {"result": res}))
except Exception as e:
print(json.dumps({"error": str(e), "trace": traceback.format_exc()})); sys.exit(1)
)PY";
std::vector<std::string> argv;
argv.push_back(py_resolve_interpreter());
argv.push_back("-c");
argv.push_back(code);
argv.push_back(yaml_path);
PyResult r = py_run(argv, 120000);
{
std::lock_guard<std::mutex> lk(g_rs.mu);
if (r.exit_code != 0) {
g_rs.last_error = r.error.empty() ? "python exited non-zero" : r.error;
g_rs.status = "Run failed";
} else {
g_rs.status = "Run OK: " + r.stdout_data.substr(0, 200);
}
}
g_rs.busy.store(false);
refresh_list();
}).detach();
}
void delete_recipe(const std::string& path) {
std::remove(path.c_str());
refresh_list();
}
} // anon
void render_recipes_panel(bool* p_open) {
if (!ImGui::Begin(TI_LIST_DETAILS " Recipes", p_open)) {
ImGui::End();
return;
}
if (ImGui::Button(TI_REFRESH " Refresh")) refresh_list();
ImGui::SameLine();
{
std::lock_guard<std::mutex> lk(g_rs.mu);
if (!g_rs.status.empty()) ImGui::Text("%s", g_rs.status.c_str());
if (!g_rs.last_error.empty()) {
ImGui::PushStyleColor(ImGuiCol_Text, fn_tokens::colors::error);
ImGui::TextWrapped("Error: %s", g_rs.last_error.c_str());
ImGui::PopStyleColor();
}
}
ImGui::Separator();
std::vector<RecipeRow> rows_copy;
int editing_idx = -1;
{
std::lock_guard<std::mutex> lk(g_rs.mu);
rows_copy = g_rs.rows;
editing_idx = g_rs.editing_idx;
}
if (rows_copy.empty()) {
ImGui::TextDisabled("No recipes in projects/navegator/profiles/default/recipes/.");
ImGui::TextDisabled("Use AutoExtract panel to create one.");
} else if (ImGui::BeginTable("##recipes_tbl", 6,
ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg)) {
ImGui::TableSetupColumn("name");
ImGui::TableSetupColumn("url_pattern");
ImGui::TableSetupColumn("last_status");
ImGui::TableSetupColumn("last_at");
ImGui::TableSetupColumn("rows");
ImGui::TableSetupColumn("actions");
ImGui::TableHeadersRow();
for (size_t i = 0; i < rows_copy.size(); ++i) {
const RecipeRow& r = rows_copy[i];
ImGui::TableNextRow();
ImGui::PushID((int)i);
ImGui::TableNextColumn();
ImGui::TextUnformatted(r.name.c_str());
ImGui::TableNextColumn();
ImGui::TextWrapped("%s", r.url_pattern.c_str());
ImGui::TableNextColumn();
ImGui::TextUnformatted(r.last_run_status.empty() ? "-" : r.last_run_status.c_str());
ImGui::TableNextColumn();
ImGui::TextUnformatted(r.last_run_at.empty() ? "-" : r.last_run_at.c_str());
ImGui::TableNextColumn();
ImGui::Text("%d", r.rows_last_run);
ImGui::TableNextColumn();
if (ImGui::SmallButton("Run")) run_recipe_async(r.yaml_path);
ImGui::SameLine();
if (ImGui::SmallButton("Edit")) {
std::string body = slurp(r.yaml_path);
std::lock_guard<std::mutex> lk(g_rs.mu);
g_rs.editing_idx = (int)i;
g_rs.edit_buf = body;
std::snprintf(g_rs.edit_textarea, sizeof(g_rs.edit_textarea),
"%s", body.c_str());
}
ImGui::SameLine();
if (ImGui::SmallButton("Delete")) delete_recipe(r.yaml_path);
ImGui::SameLine();
if (ImGui::SmallButton("Open in data_factory")) {
// placeholder — solo loguea
std::lock_guard<std::mutex> lk(g_rs.mu);
g_rs.status = "open in data_factory: " + r.name + " (not wired)";
}
ImGui::PopID();
}
ImGui::EndTable();
}
if (editing_idx >= 0 && editing_idx < (int)rows_copy.size()) {
ImGui::Separator();
ImGui::Text("Editing: %s", rows_copy[editing_idx].yaml_path.c_str());
ImGui::InputTextMultiline("##rec_edit", g_rs.edit_textarea,
sizeof(g_rs.edit_textarea),
ImVec2(-1, 220));
if (ImGui::Button(TI_DEVICE_FLOPPY " Save")) {
std::ofstream f(rows_copy[editing_idx].yaml_path, std::ios::binary);
if (f) {
f << g_rs.edit_textarea;
f.close();
std::lock_guard<std::mutex> lk(g_rs.mu);
g_rs.status = "Saved " + rows_copy[editing_idx].yaml_path;
g_rs.editing_idx = -1;
}
refresh_list();
}
ImGui::SameLine();
if (ImGui::Button("Cancel")) {
std::lock_guard<std::mutex> lk(g_rs.mu);
g_rs.editing_idx = -1;
}
}
ImGui::End();
}
} // namespace navegator