diff --git a/CMakeLists.txt b/CMakeLists.txt index 1d59af3..602225a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,6 +17,10 @@ add_imgui_app(navegator_dashboard cdp_ws.cpp network_state.cpp session_state.cpp + picker_state.cpp + py_subprocess.cpp + autoextract_panel.cpp + recipes_panel.cpp ) target_include_directories(navegator_dashboard PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) @@ -50,6 +54,10 @@ if(FN_BUILD_TESTS) cdp_ws.cpp network_state.cpp session_state.cpp + picker_state.cpp + py_subprocess.cpp + autoextract_panel.cpp + recipes_panel.cpp tests/navegator_dashboard_tests.cpp ) target_include_directories(navegator_dashboard_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) diff --git a/app.md b/app.md index 851ab2b..ce0e9a5 100644 --- a/app.md +++ b/app.md @@ -3,7 +3,7 @@ name: navegator_dashboard lang: cpp domain: tools description: "Cuadro de mandos para gestionar instancias Chrome con remote debugging. Lista navegadores corriendo (visibles + headless), permite lanzar/matar perfiles, inspeccionar pestañas, ejecutar JS, ver peticiones de red. Puente WSL→Windows que centraliza el control que hoy hacemos por scripts dispersos." -tags: [imgui, browser, cdp, dashboard, windows, navegator] +tags: [imgui, browser, cdp, dashboard, windows, navegator, auto-extract, recipes, picker] uses_functions: - data_table_cpp_viz - viz_render_cpp_viz @@ -17,6 +17,14 @@ uses_functions: - compute_column_stats_cpp_core - llm_anthropic_cpp_core - tql_to_sql_cpp_core + - claude_cli_prompt_py_infra + - cdp_get_ax_tree_py_pipelines + - llm_propose_scraping_schema_py_infra + - cdp_extract_recipe_py_pipelines + - cdp_open_url_and_wait_py_pipelines + - validate_recipe_yaml_py_core + - infer_json_rows_schema_py_core + - cdp_pick_element_js_js_browser uses_types: [] framework: "imgui" entry_point: "main.cpp" @@ -37,6 +45,12 @@ e2e_checks: cmd: "curl -sf http://127.0.0.1:19333/browsers" timeout_s: 5 severity: warning + - id: claude_cli_available + cmd: "command -v claude" + severity: warning + - id: python_pipelines_importable + cmd: "python3 -c 'import sys, os; sys.path.insert(0, os.path.join(os.environ[\"FN_REGISTRY_ROOT\"], \"python\", \"functions\", \"pipelines\")); from cdp_extract_recipe import cdp_extract_recipe'" + severity: warning --- ## Proposito diff --git a/appicon.ico b/appicon.ico new file mode 100644 index 0000000..c0ca60c Binary files /dev/null and b/appicon.ico differ diff --git a/autoextract_panel.cpp b/autoextract_panel.cpp new file mode 100644 index 0000000..7cd33b7 --- /dev/null +++ b/autoextract_panel.cpp @@ -0,0 +1,585 @@ +// autoextract_panel — UI para AutoExtract: +// 1. URL -> Open & Analyze: pipeline Python +// cdp_open_url_and_wait + cdp_get_ax_tree + llm_propose_scraping_schema. +// 2. Editar schema propuesto (selectores + keep checkbox + tipo). +// 3. Test extraction via Runtime.evaluate (JS construido). +// 4. Save as recipe -> YAML en projects/navegator/profiles/default/recipes/. +// + INSERT en data_factory.db (subprocess sqlite3 inline). +// +// La comunicacion con Python es subprocess (py_subprocess.h) — el script Python +// reusa funciones del registry via sys.path injection desde FN_REGISTRY_ROOT. + +#include "imgui.h" +#include "core/icons_tabler.h" +#include "core/tokens.h" + +#include "session_state.h" +#include "py_subprocess.h" +#include "picker_state.h" + +#include "crude_json.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef _WIN32 +# define WIN32_LEAN_AND_MEAN +# include +#else +# include +# include +#endif + +namespace navegator { + +namespace { + +struct SchemaField { + std::string field; + std::string selector; + std::string sample; + std::string type; // string|number|bool|url + bool keep = true; +}; + +struct AutoExtractState { + std::mutex mu; + char url_input[1024] = "https://news.ycombinator.com"; + char recipe_name[256] = ""; + std::atomic busy{false}; + std::string status; + std::string last_error; + std::string raw_python_output; // diagnostico + std::vector schema; + std::string proposed_tab_id; + std::string test_output; +}; + +AutoExtractState g_ax; + +// JSON-escape de selector para serializar el JS de extraccion. +std::string js_escape(const std::string& s) { + std::string out; out.reserve(s.size() + 4); + for (char c : s) { + switch (c) { + case '\\': out += "\\\\"; break; + case '"': out += "\\\""; break; + case '\n': out += "\\n"; break; + case '\r': out += "\\r"; break; + default: out += c; + } + } + return out; +} + +std::string slugify(const std::string& s) { + std::string out; out.reserve(s.size()); + for (char c : s) { + if (std::isalnum((unsigned char)c)) out += (char)std::tolower((unsigned char)c); + else if (c == ' ' || c == '-' || c == '_') out += '-'; + } + if (out.empty()) out = "recipe"; + return out; +} + +void run_open_and_analyze(int port, std::string url) { + if (g_ax.busy.exchange(true)) return; + { + std::lock_guard lk(g_ax.mu); + g_ax.status = "Spawning python pipeline..."; + g_ax.last_error.clear(); + g_ax.schema.clear(); + } + + std::thread([port, url]() { + const char* code = R"PY( +import sys, os, json, traceback +root = os.environ.get('FN_REGISTRY_ROOT', '') +if not root: + print(json.dumps({"error": "FN_REGISTRY_ROOT not set"})) + sys.exit(2) +for sub in ('pipelines','core','infra'): + sys.path.insert(0, os.path.join(root, 'python', 'functions', sub)) +try: + from cdp_open_url_and_wait import cdp_open_url_and_wait + from cdp_get_ax_tree import cdp_get_ax_tree + from llm_propose_scraping_schema import llm_propose_scraping_schema + url = sys.argv[1] + port = int(sys.argv[2]) + tab_id = cdp_open_url_and_wait(port, url, timeout_s=30) + ax = cdp_get_ax_tree(port, tab_id) + schema = llm_propose_scraping_schema(url, ax) + out = {"tab_id": tab_id} + if isinstance(schema, dict): + out.update(schema) + else: + out["fields"] = schema + print(json.dumps(out)) +except Exception as e: + print(json.dumps({"error": str(e), "trace": traceback.format_exc()})) + sys.exit(1) +)PY"; + std::vector argv; + argv.push_back(py_resolve_interpreter()); + argv.push_back("-c"); + argv.push_back(code); + argv.push_back(url); + argv.push_back(std::to_string(port)); + PyResult r = py_run(argv, 120000); + { + std::lock_guard lk(g_ax.mu); + g_ax.raw_python_output = r.stdout_data; + } + if (r.exit_code != 0 || r.stdout_data.empty()) { + std::lock_guard lk(g_ax.mu); + g_ax.last_error = r.error.empty() ? "python exited non-zero" : r.error; + g_ax.status = "Failed"; + g_ax.busy.store(false); + return; + } + // Parse JSON (puede haber varias lineas — tomamos la ultima no vacia). + std::string json_line; + { + std::stringstream ss(r.stdout_data); + std::string line; + while (std::getline(ss, line)) { + if (!line.empty() && line.front() == '{') json_line = line; + } + } + if (json_line.empty()) { + std::lock_guard lk(g_ax.mu); + g_ax.last_error = "no JSON object in stdout"; + g_ax.status = "Failed"; + g_ax.busy.store(false); + return; + } + crude_json::value v = crude_json::value::parse(json_line); + if (!v.is_object()) { + std::lock_guard lk(g_ax.mu); + g_ax.last_error = "stdout is not a JSON object"; + g_ax.status = "Failed"; + g_ax.busy.store(false); + return; + } + if (v.contains("error")) { + std::lock_guard lk(g_ax.mu); + g_ax.last_error = v["error"].is_string() ? v["error"].get() : "error"; + g_ax.status = "Failed"; + g_ax.busy.store(false); + return; + } + std::vector fields; + std::string tab_id; + if (v.contains("tab_id") && v["tab_id"].is_string()) tab_id = v["tab_id"].get(); + if (v.contains("fields") && v["fields"].is_array()) { + const auto& arr = v["fields"].get(); + for (size_t i = 0; i < arr.size(); ++i) { + const auto& f = arr[i]; + if (!f.is_object()) continue; + SchemaField sf; + auto getstr = [&](const char* k){ + if (!f.contains(k)) return std::string(); + const auto& x = f[k]; + if (x.is_string()) return x.get(); + if (x.is_null()) return std::string(); + return x.dump(); + }; + sf.field = getstr("field"); + sf.selector = getstr("selector"); + sf.sample = getstr("sample_value"); + if (sf.sample.empty()) sf.sample = getstr("sample"); + sf.type = getstr("type"); + if (sf.type.empty()) sf.type = "string"; + sf.keep = true; + if (!sf.field.empty()) fields.push_back(std::move(sf)); + } + } + { + std::lock_guard lk(g_ax.mu); + g_ax.schema = std::move(fields); + g_ax.proposed_tab_id = tab_id; + g_ax.status = "Schema proposed (" + std::to_string(g_ax.schema.size()) + " fields)"; + } + g_ax.busy.store(false); + }).detach(); +} + +std::string build_extraction_js(const std::vector& schema) { + std::ostringstream js; + js << "(function(){var o={};"; + for (const auto& f : schema) { + if (!f.keep || f.field.empty() || f.selector.empty()) continue; + // Map type -> coerce. + std::string sel = js_escape(f.selector); + js << "try{var e=document.querySelector(\"" << sel << "\");"; + js << "o[\"" << js_escape(f.field) << "\"]="; + if (f.type == "number") { + js << "e?parseFloat((e.innerText||e.textContent||'').replace(/[^0-9.\\-]/g,'')):null;"; + } else if (f.type == "url") { + js << "e?(e.href||e.src||null):null;"; + } else if (f.type == "bool") { + js << "e?true:false;"; + } else { + js << "e?(e.innerText||e.textContent||'').trim():null;"; + } + js << "}catch(_){o[\"" << js_escape(f.field) << "\"]=null;}"; + } + js << "return o;})()"; + return js.str(); +} + +void run_test_extraction(int port, const std::string& tab_id) { + std::vector sc; + { + std::lock_guard lk(g_ax.mu); + sc = g_ax.schema; + } + std::string js = build_extraction_js(sc); + if (g_ax.busy.exchange(true)) return; + { + std::lock_guard lk(g_ax.mu); + g_ax.status = "Running extraction in tab..."; + g_ax.test_output.clear(); + } + std::thread([port, tab_id, js]() { + const char* code = R"PY( +import sys, os, json, traceback +root = os.environ.get('FN_REGISTRY_ROOT', '') +if not root: + print(json.dumps({"error":"FN_REGISTRY_ROOT not set"})); sys.exit(2) +for sub in ('pipelines','core','infra','browser'): + sys.path.insert(0, os.path.join(root, 'python', 'functions', sub)) +try: + port = int(sys.argv[1]) + tab_id = sys.argv[2] + js = sys.argv[3] + # Best-effort: reuse cdp_runtime_evaluate si existe; sino, hablar CDP directo. + try: + from cdp_runtime_evaluate import cdp_runtime_evaluate + out = cdp_runtime_evaluate(port, tab_id, js, return_by_value=True) + except Exception: + import urllib.request, json as _j, websocket + ws_url = None + with urllib.request.urlopen(f'http://127.0.0.1:{port}/json') as r: + for t in _j.loads(r.read()): + if t.get('id') == tab_id: + ws_url = t.get('webSocketDebuggerUrl'); break + if not ws_url: raise RuntimeError('tab not found') + w = websocket.create_connection(ws_url, timeout=10) + w.send(_j.dumps({"id":1,"method":"Runtime.evaluate","params":{ + "expression": js, "returnByValue": True}})) + out = _j.loads(w.recv()).get('result', {}).get('result', {}).get('value') + w.close() + print(json.dumps({"result": out})) +except Exception as e: + print(json.dumps({"error": str(e), "trace": traceback.format_exc()})); sys.exit(1) +)PY"; + std::vector argv; + argv.push_back(py_resolve_interpreter()); + argv.push_back("-c"); + argv.push_back(code); + argv.push_back(std::to_string(port)); + argv.push_back(tab_id); + argv.push_back(js); + PyResult r = py_run(argv, 30000); + { + std::lock_guard lk(g_ax.mu); + if (r.exit_code != 0) { + g_ax.last_error = r.error.empty() ? "python exited non-zero" : r.error; + g_ax.status = "Test failed"; + g_ax.test_output = r.stdout_data; + } else { + g_ax.test_output = r.stdout_data; + g_ax.status = "Test OK"; + } + g_ax.raw_python_output = r.stdout_data; + } + g_ax.busy.store(false); + }).detach(); +} + +std::string yaml_quote(const std::string& s) { + bool needs = s.empty() || s.find_first_of(":#\"'") != std::string::npos + || s.find_first_of(" \t") == 0; + if (!needs) return s; + std::string out = "\""; + for (char c : s) { + if (c == '"' || c == '\\') out += '\\'; + out += c; + } + out += "\""; + return out; +} + +bool ensure_dir(const std::string& path) { +#ifdef _WIN32 + return CreateDirectoryA(path.c_str(), nullptr) || GetLastError() == ERROR_ALREADY_EXISTS; +#else + if (mkdir(path.c_str(), 0755) == 0) return true; + return errno == EEXIST; +#endif +} + +void save_recipe(const std::string& name, const std::string& url) { + std::string root = py_resolve_registry_root(); + if (root.empty()) { + std::lock_guard lk(g_ax.mu); + g_ax.last_error = "FN_REGISTRY_ROOT not set; cannot resolve recipes dir"; + g_ax.status = "Save failed"; + return; + } + std::string slug = slugify(name); + std::string sep = +#ifdef _WIN32 + "\\"; +#else + "/"; +#endif + std::string dir = root + sep + "projects" + sep + "navegator" + sep + "profiles" + + sep + "default" + sep + "recipes"; + + // crear directorios padres uno a uno (best effort). +#ifdef _WIN32 + { + std::string acc; + for (size_t i = 0; i < dir.size(); ++i) { + if (dir[i] == '\\' && i > 2) { + acc.assign(dir.begin(), dir.begin() + i); + CreateDirectoryA(acc.c_str(), nullptr); + } + } + CreateDirectoryA(dir.c_str(), nullptr); + } +#else + { + std::string acc; + for (size_t i = 0; i < dir.size(); ++i) { + if (dir[i] == '/' && i > 0) { + acc.assign(dir.begin(), dir.begin() + i); + mkdir(acc.c_str(), 0755); + } + } + mkdir(dir.c_str(), 0755); + } +#endif + + std::string path = dir + sep + slug + ".yaml"; + + std::vector sc; + std::string url_used = url; + { + std::lock_guard lk(g_ax.mu); + sc = g_ax.schema; + } + + std::string js = build_extraction_js(sc); + + std::ostringstream y; + y << "name: " << yaml_quote(slug) << "\n"; + y << "url_pattern: " << yaml_quote(url_used) << "\n"; + y << "steps:\n"; + if (!sc.empty() && !sc.front().selector.empty()) { + y << " - wait_selector: " << yaml_quote(sc.front().selector) << "\n"; + } + y << " - js: |\n"; + // indent js with 6 spaces + { + std::stringstream ss(js); + std::string line; + while (std::getline(ss, line)) y << " " << line << "\n"; + } + y << "output:\n"; + y << " schema:\n"; + for (const auto& f : sc) { + if (!f.keep) continue; + y << " - name: " << yaml_quote(f.field) + << " type: " << yaml_quote(f.type) + << " selector: " << yaml_quote(f.selector) << "\n"; + } + y << " sink: data_factory.runs\n"; + + std::ofstream f(path, std::ios::binary); + if (!f) { + std::lock_guard lk(g_ax.mu); + g_ax.last_error = "could not write " + path; + g_ax.status = "Save failed"; + return; + } + f << y.str(); + f.close(); + + // INSERT en data_factory.db via subprocess sqlite3 (NO bloquea si falla). + std::thread([root, slug]() { + const char* code = R"PY( +import sys, os, sqlite3, traceback +try: + root = sys.argv[1] + name = sys.argv[2] + db_path = os.path.join(root, 'apps', 'data_factory', 'data_factory.db') + if not os.path.exists(db_path): + print("data_factory.db not found at " + db_path); sys.exit(0) + conn = sqlite3.connect(db_path) + cur = conn.cursor() + cur.execute("""INSERT OR IGNORE INTO nodes(id, kind, name, function_id, description, + schedule_cron, enabled, tags_csv, created_at, updated_at) + VALUES(?, 'extractor', ?, 'cdp_extract_recipe_py_pipelines', ?, '', + 1, 'navegator,recipe', datetime('now'), datetime('now'))""", + (name, name, "auto-extract recipe " + name)) + conn.commit(); conn.close() + print("ok") +except Exception as e: + print(traceback.format_exc()) +)PY"; + std::vector argv; + argv.push_back(py_resolve_interpreter()); + argv.push_back("-c"); + argv.push_back(code); + argv.push_back(root); + argv.push_back(slug); + (void)py_run(argv, 10000); + }).detach(); + + { + std::lock_guard lk(g_ax.mu); + g_ax.status = "Recipe saved: " + path; + } +} + +} // anon + +void render_autoextract_panel(bool* p_open) { + if (!ImGui::Begin(TI_BOX " AutoExtract", p_open)) { + ImGui::End(); + return; + } + + int port = 0; + { + std::lock_guard lk(g_session().mu); + port = g_session().selected_port; + } + if (port <= 0) { + ImGui::TextDisabled("Select a browser in the Browsers panel."); + ImGui::End(); + return; + } + + ImGui::TextUnformatted("URL:"); + ImGui::SameLine(); + ImGui::SetNextItemWidth(420); + ImGui::InputText("##ax_url", g_ax.url_input, sizeof(g_ax.url_input)); + ImGui::SameLine(); + bool busy = g_ax.busy.load(); + if (busy) ImGui::BeginDisabled(); + if (ImGui::Button(TI_PLAYER_PLAY " Open & Analyze")) { + run_open_and_analyze(port, g_ax.url_input); + } + if (busy) ImGui::EndDisabled(); + + if (busy) { + ImGui::SameLine(); + ImGui::TextDisabled("working..."); + } + + { + std::lock_guard lk(g_ax.mu); + if (!g_ax.status.empty()) ImGui::Text("Status: %s", g_ax.status.c_str()); + if (!g_ax.last_error.empty()) { + ImGui::PushStyleColor(ImGuiCol_Text, fn_tokens::colors::error); + ImGui::TextWrapped("Error: %s", g_ax.last_error.c_str()); + ImGui::PopStyleColor(); + } + } + + ImGui::Separator(); + ImGui::TextDisabled("Proposed schema (editable):"); + + std::vector sc_copy; + { + std::lock_guard lk(g_ax.mu); + sc_copy = g_ax.schema; + } + + if (ImGui::BeginTable("##ax_schema", 5, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg)) { + ImGui::TableSetupColumn("field"); + ImGui::TableSetupColumn("selector"); + ImGui::TableSetupColumn("sample"); + ImGui::TableSetupColumn("type"); + ImGui::TableSetupColumn("keep"); + ImGui::TableHeadersRow(); + + bool dirty = false; + for (size_t i = 0; i < sc_copy.size(); ++i) { + ImGui::TableNextRow(); + ImGui::PushID((int)i); + ImGui::TableNextColumn(); + char fb[128]; std::snprintf(fb, sizeof(fb), "%s", sc_copy[i].field.c_str()); + if (ImGui::InputText("##field", fb, sizeof(fb))) { sc_copy[i].field = fb; dirty = true; } + ImGui::TableNextColumn(); + char sb[512]; std::snprintf(sb, sizeof(sb), "%s", sc_copy[i].selector.c_str()); + if (ImGui::InputText("##selector", sb, sizeof(sb))) { sc_copy[i].selector = sb; dirty = true; } + ImGui::TableNextColumn(); + ImGui::TextWrapped("%s", sc_copy[i].sample.c_str()); + ImGui::TableNextColumn(); + char tb[32]; std::snprintf(tb, sizeof(tb), "%s", sc_copy[i].type.c_str()); + if (ImGui::InputText("##type", tb, sizeof(tb))) { sc_copy[i].type = tb; dirty = true; } + ImGui::TableNextColumn(); + bool keep = sc_copy[i].keep; + if (ImGui::Checkbox("##keep", &keep)) { sc_copy[i].keep = keep; dirty = true; } + ImGui::PopID(); + } + ImGui::EndTable(); + + if (dirty) { + std::lock_guard lk(g_ax.mu); + g_ax.schema = sc_copy; + } + } + + ImGui::Separator(); + std::string tab_id; + { + std::lock_guard lk(g_ax.mu); + tab_id = g_ax.proposed_tab_id; + } + if (busy) ImGui::BeginDisabled(); + if (ImGui::Button(TI_FLASK " Test extraction") && !tab_id.empty()) { + run_test_extraction(port, tab_id); + } + if (busy) ImGui::EndDisabled(); + ImGui::SameLine(); + ImGui::TextDisabled("Recipe name:"); + ImGui::SameLine(); + ImGui::SetNextItemWidth(200); + ImGui::InputText("##rname", g_ax.recipe_name, sizeof(g_ax.recipe_name)); + ImGui::SameLine(); + if (ImGui::Button(TI_DEVICE_FLOPPY " Save as recipe")) { + if (g_ax.recipe_name[0]) save_recipe(g_ax.recipe_name, g_ax.url_input); + } + + { + std::lock_guard lk(g_ax.mu); + if (!g_ax.test_output.empty()) { + ImGui::Separator(); + ImGui::TextDisabled("Test output:"); + ImGui::InputTextMultiline("##test_out", (char*)g_ax.test_output.c_str(), + g_ax.test_output.size() + 1, + ImVec2(-1, 120), ImGuiInputTextFlags_ReadOnly); + } + } + + ImGui::End(); +} + +} // namespace navegator diff --git a/main.cpp b/main.cpp index 8f56126..2c40a3b 100644 --- a/main.cpp +++ b/main.cpp @@ -28,6 +28,8 @@ void render_browsers_panel(bool* p_open); void render_tabs_panel(bool* p_open); void render_tab_detail_panel(bool* p_open); void render_network_panel(bool* p_open); +void render_autoextract_panel(bool* p_open); +void render_recipes_panel(bool* p_open); // ---------- Visibilidad de paneles ----------------------------------------- bool show_browsers = true; @@ -35,14 +37,18 @@ bool show_tabs = true; bool show_tab_detail = false; bool show_network = false; bool show_agent = false; +bool show_autoextract = false; +bool show_recipes = false; namespace { constexpr fn_ui::PanelToggle k_panels[] = { - {"Browsers", "Ctrl+1", &show_browsers}, - {"Tabs", "Ctrl+2", &show_tabs}, - {"Tab Detail", "Ctrl+3", &show_tab_detail}, - {"Network", "Ctrl+4", &show_network}, - {"Agent", "Ctrl+5", &show_agent}, + {"Browsers", "Ctrl+1", &show_browsers}, + {"Tabs", "Ctrl+2", &show_tabs}, + {"Tab Detail", "Ctrl+3", &show_tab_detail}, + {"Network", "Ctrl+4", &show_network}, + {"Agent", "Ctrl+5", &show_agent}, + {"AutoExtract", "Ctrl+6", &show_autoextract}, + {"Recipes", "Ctrl+7", &show_recipes}, }; } // anon @@ -118,14 +124,17 @@ void extra_del(const std::string& name) { // ---------- API publica para tests + main ---------------------------------- std::string capture_panel_state() { - char buf[256]; + char buf[384]; std::snprintf(buf, sizeof(buf), - "{\"browsers\":%d,\"tabs\":%d,\"tab_detail\":%d,\"network\":%d,\"agent\":%d}", + "{\"browsers\":%d,\"tabs\":%d,\"tab_detail\":%d,\"network\":%d,\"agent\":%d," + "\"autoextract\":%d,\"recipes\":%d}", show_browsers ? 1 : 0, show_tabs ? 1 : 0, show_tab_detail ? 1 : 0, show_network ? 1 : 0, - show_agent ? 1 : 0); + show_agent ? 1 : 0, + show_autoextract ? 1 : 0, + show_recipes ? 1 : 0); return buf; } @@ -138,16 +147,18 @@ void apply_panel_state(const std::string& json) { if (p >= json.size()) return def; return json[p] == '1' || (json.compare(p, 4, "true") == 0); }; - show_browsers = pull("browsers", true); - show_tabs = pull("tabs", true); - show_tab_detail = pull("tab_detail", true); - show_network = pull("network", true); - show_agent = pull("agent", false); + show_browsers = pull("browsers", true); + show_tabs = pull("tabs", true); + show_tab_detail = pull("tab_detail", true); + show_network = pull("network", true); + show_agent = pull("agent", false); + show_autoextract = pull("autoextract", false); + show_recipes = pull("recipes", false); } void open_all_panels() { show_browsers = show_tabs = show_tab_detail = show_network = true; - // agent es opt-in: ni save/apply ni reset lo abren por defecto. + // agent / autoextract / recipes son opt-in: no se reabren con Reset. } void setup_layouts(fn::AppConfig& cfg) { @@ -291,11 +302,13 @@ void teardown_layouts() { // — no llamamos DockSpaceOverViewport aqui para no duplicar. void render() { using namespace navegator; - if (show_browsers) render_browsers_panel(&show_browsers); - if (show_tabs) render_tabs_panel(&show_tabs); - if (show_tab_detail) render_tab_detail_panel(&show_tab_detail); - if (show_network) render_network_panel(&show_network); - if (show_agent) app_agent::chat_render(&show_agent); + if (show_browsers) render_browsers_panel(&show_browsers); + if (show_tabs) render_tabs_panel(&show_tabs); + if (show_tab_detail) render_tab_detail_panel(&show_tab_detail); + if (show_network) render_network_panel(&show_network); + if (show_agent) app_agent::chat_render(&show_agent); + if (show_autoextract) render_autoextract_panel(&show_autoextract); + if (show_recipes) render_recipes_panel(&show_recipes); } #ifndef FN_TEST_BUILD diff --git a/panels.cpp b/panels.cpp index 06211ce..9c71be9 100644 --- a/panels.cpp +++ b/panels.cpp @@ -20,6 +20,8 @@ #include "local_api.h" #include "cdp_http.h" #include "session_state.h" +#include "picker_state.h" +#include "py_subprocess.h" #include #include @@ -27,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -532,6 +535,56 @@ void render_tab_detail_panel(bool* p_open) { ImGui::Text("Browser :%d", port); ImGui::Text("Tab id %s", sel_id.c_str()); ImGui::TextWrapped("WS %s", sel_ws.c_str()); + ImGui::Separator(); + + // --- Pick element --- + bool active = picker_is_active(); + if (active) ImGui::PushStyleColor(ImGuiCol_Button, fn_tokens::colors::primary); + if (ImGui::Button(active ? (TI_FLASK " Picking... (click to stop)") + : (TI_FLASK " Pick element"))) { + if (active) { + picker_stop(); + } else { + std::string err = picker_start(port, sel_id, sel_ws); + if (!err.empty()) { + ImGui::PushStyleColor(ImGuiCol_Text, fn_tokens::colors::error); + ImGui::TextWrapped("Pick error: %s", err.c_str()); + ImGui::PopStyleColor(); + } + } + } + if (active) ImGui::PopStyleColor(); + ImGui::SameLine(); + ImGui::TextDisabled("(injects functions/browser/cdp_pick_element_js.js via CDP)"); + + PickedElement last = picker_last(); + if (last.valid) { + ImGui::Separator(); + ImGui::TextDisabled("Last picked:"); + if (ImGui::BeginChild("##picked_card", ImVec2(0, 110), true)) { + ImGui::Text("tag: %s", last.tag.c_str()); + ImGui::TextWrapped("selector: %s", last.selector.c_str()); + ImGui::TextWrapped("xpath: %s", last.xpath.c_str()); + std::string short_text = last.text; + if (short_text.size() > 200) short_text = short_text.substr(0, 200) + "..."; + ImGui::TextWrapped("text: %s", short_text.c_str()); + } + ImGui::EndChild(); + if (ImGui::SmallButton("Copy selector")) { + ImGui::SetClipboardText(last.selector.c_str()); + } + ImGui::SameLine(); + if (ImGui::SmallButton("Save to recipe (new)")) { + // Placeholder: futura integracion para crear recipe nueva con un + // unico field a partir del selector. Por ahora se copia. + ImGui::SetClipboardText(last.selector.c_str()); + } + ImGui::SameLine(); + if (ImGui::SmallButton("Clear")) picker_clear_last(); + } else { + ImGui::TextDisabled("(no picked element yet — click 'Pick element' and click on the page)"); + } + ImGui::Separator(); ImGui::TextWrapped( "Tab Detail (HTML preview + screenshot + Runtime.evaluate REPL) llega " @@ -744,8 +797,88 @@ void draw_request_detail(const NetworkRequest& r, NetworkSession* net) { ImGui::EndTabItem(); } if (ImGui::BeginTabItem("Response")) { + // Detect JSON response (content-type: application/json). + bool is_json = false; + for (const auto& h : r.response_headers) { + std::string n = h.name; std::transform(n.begin(), n.end(), n.begin(), ::tolower); + if (n == "content-type" && h.value.find("application/json") != std::string::npos) { + is_json = true; break; + } + } if (r.body_fetched && !r.body_text.empty()) { if (ImGui::SmallButton("Copy")) copy_to_clipboard(r.body_text); + if (is_json) { + ImGui::SameLine(); + if (ImGui::SmallButton(TI_LIST_DETAILS " Parse")) { + // Llama infer_json_rows_schema via subprocess. + static std::string g_parsed; // sticky entre frames + g_parsed.clear(); + const char* code = R"PY( +import sys, os, json, traceback +root = os.environ.get('FN_REGISTRY_ROOT', '') +if not root: + print(json.dumps({"error":"FN_REGISTRY_ROOT not set"})); sys.exit(2) +for sub in ('core',): + sys.path.insert(0, os.path.join(root, 'python', 'functions', sub)) +try: + from infer_json_rows_schema import infer_json_rows_schema + body = sys.stdin.read() + obj = json.loads(body) + res = infer_json_rows_schema(obj) + print(json.dumps(res if isinstance(res, dict) else {"result": res})) +except Exception as e: + print(json.dumps({"error": str(e), "trace": traceback.format_exc()})); sys.exit(1) +)PY"; + std::vector argv; + argv.push_back(py_resolve_interpreter()); + argv.push_back("-c"); + argv.push_back(code); + // Lanza un thread y deja log en g_net_ui.* via clipboard (simple). + std::string body = r.body_text; + std::thread([argv, body]() { + (void)argv; (void)body; + // py_run no soporta stdin todavia; usamos un archivo temporal. + // Para mantener el patch minimo: escribimos body a archivo temp, + // y pasamos su path como argv extra; el script lo lee. + char tmp[256]; + std::snprintf(tmp, sizeof(tmp), "%s%snav_body_%lld.json", +#ifdef _WIN32 + getenv("TEMP") ? getenv("TEMP") : ".", "\\", +#else + "/tmp", "/", +#endif + (long long)std::time(nullptr)); + { + std::ofstream f(tmp, std::ios::binary); + if (f) f.write(body.data(), body.size()); + } + const char* code2 = R"PY( +import sys, os, json, traceback +root = os.environ.get('FN_REGISTRY_ROOT', '') +if not root: + print(json.dumps({"error":"FN_REGISTRY_ROOT not set"})); sys.exit(2) +for sub in ('core',): + sys.path.insert(0, os.path.join(root, 'python', 'functions', sub)) +try: + from infer_json_rows_schema import infer_json_rows_schema + with open(sys.argv[1], 'rb') as f: body = f.read().decode('utf-8','replace') + obj = json.loads(body) + res = infer_json_rows_schema(obj) + print(json.dumps(res if isinstance(res, dict) else {"result": res})) +except Exception as e: + print(json.dumps({"error": str(e), "trace": traceback.format_exc()})); sys.exit(1) +)PY"; + std::vector a2 = { + py_resolve_interpreter(), "-c", code2, tmp + }; + PyResult pr = py_run(a2, 30000); + ImGui::SetClipboardText(pr.stdout_data.c_str()); + std::remove(tmp); + }).detach(); + } + ImGui::SameLine(); + ImGui::TextDisabled("(result -> clipboard)"); + } ImGui::Separator(); ImGui::InputTextMultiline("##body", (char*)r.body_text.c_str(), r.body_text.size() + 1, ImVec2(-1, -1), ImGuiInputTextFlags_ReadOnly); diff --git a/picker_state.cpp b/picker_state.cpp new file mode 100644 index 0000000..3cbd8ff --- /dev/null +++ b/picker_state.cpp @@ -0,0 +1,197 @@ +#include "picker_state.h" + +#include "cdp_ws.h" +#include "py_subprocess.h" // for py_resolve_registry_root + +// crude_json del vendor imgui-node-editor (ya linkado por CMakeLists.txt). +#include "crude_json.h" + +#include +#include +#include +#include +#include +#include + +namespace navegator { + +namespace { + +std::mutex g_mu; +std::unique_ptr g_ws; +std::atomic g_active{false}; +std::atomic g_stop_pump{false}; +std::thread g_pump_thread; +PickedElement g_last; + +std::string slurp(const std::string& path) { + std::ifstream f(path, std::ios::binary); + if (!f) return ""; + std::ostringstream ss; ss << f.rdbuf(); + return ss.str(); +} + +// JS-string-literal escape (NO JSON encoding — el JSON wrapper se hace al construir params). +std::string js_str_escape(const std::string& s) { + std::string out; out.reserve(s.size() + 8); + for (char c : s) { + switch (c) { + case '\\': out += "\\\\"; break; + case '"': out += "\\\""; break; + case '\n': out += "\\n"; break; + case '\r': out += "\\r"; break; + case '\t': out += "\\t"; break; + default: + if ((unsigned char)c < 0x20) { + char buf[8]; std::snprintf(buf, sizeof(buf), "\\u%04x", (unsigned)c); + out += buf; + } else out += c; + } + } + return out; +} + +void pump_loop() { + while (!g_stop_pump.load()) { + std::vector msgs; + { + std::lock_guard lk(g_mu); + if (!g_ws || !g_ws->is_connected()) break; + msgs = g_ws->drain(64); + } + for (const auto& m : msgs) { + crude_json::value v = crude_json::value::parse(m); + if (!v.is_object()) continue; + // Filtrar method=Runtime.consoleAPICalled, args[0].value=="__fn_picked__" + if (!v.contains("method")) continue; + const auto& method = v["method"]; + if (!method.is_string()) continue; + if (method.get() != "Runtime.consoleAPICalled") continue; + if (!v.contains("params")) continue; + const auto& params = v["params"]; + if (!params.is_object() || !params.contains("args")) continue; + const auto& args = params["args"]; + if (!args.is_array()) continue; + const auto& args_arr = args.get(); + if (args_arr.size() < 2) continue; + const auto& a0 = args_arr[0]; + if (!a0.is_object() || !a0.contains("value")) continue; + const auto& v0 = a0["value"]; + if (!v0.is_string() || v0.get() != "__fn_picked__") continue; + const auto& a1 = args_arr[1]; + if (!a1.is_object() || !a1.contains("value")) continue; + const auto& v1 = a1["value"]; + // v1.value puede ser string (JSON serializado) o un objeto. El JS + // hace console.log("__fn_picked__", JSON.stringify(payload)). + std::string payload; + if (v1.is_string()) payload = v1.get(); + else payload = v1.dump(); + crude_json::value p = crude_json::value::parse(payload); + if (!p.is_object()) continue; + PickedElement el; + auto get = [&](const char* k) -> std::string { + if (!p.contains(k)) return ""; + const auto& x = p[k]; + if (x.is_string()) return x.get(); + if (x.is_null()) return ""; + return x.dump(); + }; + el.selector = get("selector"); + el.xpath = get("xpath"); + el.tag = get("tag"); + el.text = get("text"); + if (p.contains("rect")) { + el.rect_str = p["rect"].dump(); + } + el.valid = true; + { + std::lock_guard lk(g_mu); + g_last = std::move(el); + } + } + std::this_thread::sleep_for(std::chrono::milliseconds(80)); + } + g_active.store(false); +} + +} // anon + +std::string picker_load_js() { + std::string root = py_resolve_registry_root(); + if (root.empty()) return ""; +#ifdef _WIN32 + std::string path = root + "\\functions\\browser\\cdp_pick_element_js.js"; +#else + std::string path = root + "/functions/browser/cdp_pick_element_js.js"; +#endif + return slurp(path); +} + +std::string picker_start(int /*port*/, const std::string& /*tab_id*/, const std::string& ws_url) { + if (ws_url.empty()) return "no ws_url"; + picker_stop(); + std::string js = picker_load_js(); + if (js.empty()) return "could not load cdp_pick_element_js.js (set FN_REGISTRY_ROOT)"; + + std::string host, path; + int p = 0; + if (!CdpWs::parse_ws_url(ws_url, host, p, path)) return "invalid ws_url"; + + auto ws = std::make_unique(); + CdpWsConfig cfg; + cfg.host = host; + cfg.port = p; + cfg.path = path; + cfg.timeout_ms = 5000; + std::string err; + if (!ws->connect(cfg, &err)) return "ws connect failed: " + err; + + // Enable Runtime (necesario para consoleAPICalled). + ws->send_command("Runtime.enable", ""); + + // Inyectar el JS via Runtime.evaluate. expression es el codigo. + // El payload del JS termina con un IIFE; envolvemos en wrapper sin returnByValue. + std::ostringstream params; + params << "{\"expression\":\"" << js_str_escape(js) + << "\",\"includeCommandLineAPI\":true" + << ",\"awaitPromise\":false" + << ",\"returnByValue\":false}"; + ws->send_command("Runtime.evaluate", params.str()); + + { + std::lock_guard lk(g_mu); + g_ws = std::move(ws); + } + g_stop_pump.store(false); + g_active.store(true); + g_pump_thread = std::thread(pump_loop); + return ""; +} + +void picker_stop() { + g_stop_pump.store(true); + { + std::lock_guard lk(g_mu); + if (g_ws) g_ws->close(); + } + if (g_pump_thread.joinable()) g_pump_thread.join(); + { + std::lock_guard lk(g_mu); + g_ws.reset(); + } + g_active.store(false); +} + +bool picker_is_active() { return g_active.load(); } + +PickedElement picker_last() { + std::lock_guard lk(g_mu); + return g_last; +} + +void picker_clear_last() { + std::lock_guard lk(g_mu); + g_last = PickedElement{}; +} + +} // namespace navegator diff --git a/picker_state.h b/picker_state.h new file mode 100644 index 0000000..8d7fb6d --- /dev/null +++ b/picker_state.h @@ -0,0 +1,51 @@ +#pragma once + +// picker_state — Pick element: inyecta JS via CDP Runtime.evaluate, escucha +// `Runtime.consoleAPICalled` filtrando args[0].value == "__fn_picked__", y +// publica el ultimo elemento elegido para que Tab Detail panel lo renderice. +// +// Decisiones: +// - WS propio (CdpWs) por panel; no comparte el de NetworkSession para +// no entrelazar Runtime.* con Network.*. +// - El JS payload se lee de ${FN_REGISTRY_ROOT}/functions/browser/cdp_pick_element_js.js +// (path por env, fallback hardcoded relativo al exe). +// - Estado global thread-safe. + +#include +#include +#include +#include + +namespace navegator { + +struct PickedElement { + std::string selector; + std::string xpath; + std::string tag; + std::string text; + std::string rect_str; // "x,y,w,h" formato libre + bool valid = false; +}; + +class CdpWs; // fwd + +// Inicia modo pick para el tab dado. Si ya esta activo, reactiva. +// Carga el JS desde el path de funciones del registry. Devuelve "" si OK, +// error en caso contrario. +std::string picker_start(int port, const std::string& tab_id, const std::string& ws_url); + +// Para el modo pick (cierra WS). +void picker_stop(); + +bool picker_is_active(); + +// Snapshot atomico del ultimo elemento capturado. +PickedElement picker_last(); + +// Limpia el ultimo capturado. +void picker_clear_last(); + +// Loader del payload JS. Devuelve "" si no se pudo leer. +std::string picker_load_js(); + +} // namespace navegator diff --git a/py_subprocess.cpp b/py_subprocess.cpp new file mode 100644 index 0000000..19d20f7 --- /dev/null +++ b/py_subprocess.cpp @@ -0,0 +1,239 @@ +#include "py_subprocess.h" + +#include "app_base.h" + +#include +#include +#include +#include +#include +#include +#include + +#ifdef _WIN32 +# define WIN32_LEAN_AND_MEAN +# include +#else +# include +# include +# include +#endif + +namespace navegator { + +namespace { + +bool file_exists(const std::string& p) { + if (p.empty()) return false; + FILE* f = std::fopen(p.c_str(), "rb"); + if (!f) return false; + std::fclose(f); + return true; +} + +std::string getenv_str(const char* name) { + const char* v = std::getenv(name); + return v ? std::string(v) : std::string(); +} + +} // anon + +std::string py_resolve_registry_root() { + std::string s = getenv_str("FN_REGISTRY_ROOT"); + if (!s.empty()) return s; + + // Fallback: deducir desde exe_dir subiendo hacia el repo. Por defecto + // la app vive en projects/navegator/apps//, asi que 4 niveles + // arriba esta la raiz. + std::string exe = fn::exe_dir(); + if (exe.empty()) return ""; + + // Si la app esta en Desktop\apps\\, no podemos deducir — devolver "". + // El user debe setear FN_REGISTRY_ROOT en el entorno. + if (exe.find("Desktop") != std::string::npos) return ""; + + // Subir 4 niveles. + std::string p = exe; + for (int i = 0; i < 4; ++i) { + auto pos = p.find_last_of("/\\"); + if (pos == std::string::npos) return ""; + p = p.substr(0, pos); + } + return p; +} + +std::string py_resolve_interpreter() { + std::string root = py_resolve_registry_root(); + if (!root.empty()) { +#ifdef _WIN32 + std::string venv_py = root + "\\python\\.venv\\Scripts\\python.exe"; + if (file_exists(venv_py)) return venv_py; +#else + std::string venv_py = root + "/python/.venv/bin/python3"; + if (file_exists(venv_py)) return venv_py; +#endif + } +#ifdef _WIN32 + return "python"; // confiar en PATH (py launcher o python.exe) +#else + return "python3"; +#endif +} + +// --------------------------------------------------------------------------- +// Windows impl +// --------------------------------------------------------------------------- +#ifdef _WIN32 + +namespace { + +std::string quote_arg_win(const std::string& a) { + bool need_q = a.empty() || a.find_first_of(" \t\"") != std::string::npos; + if (!need_q) return a; + std::string out; + out.reserve(a.size() + 4); + out += '"'; + for (char c : a) { + if (c == '"') out += "\\\""; + else if (c == '\\') { out += "\\\\"; } + else out += c; + } + out += '"'; + return out; +} + +} // anon + +PyResult py_run(const std::vector& argv, int timeout_ms) { + PyResult res; + if (argv.empty()) { res.error = "argv empty"; return res; } + + std::string cmd; + for (size_t i = 0; i < argv.size(); ++i) { + if (i) cmd += ' '; + cmd += quote_arg_win(argv[i]); + } + + HANDLE r_pipe = nullptr; + HANDLE w_pipe = nullptr; + SECURITY_ATTRIBUTES sa{}; + sa.nLength = sizeof(sa); + sa.bInheritHandle = TRUE; + if (!CreatePipe(&r_pipe, &w_pipe, &sa, 0)) { + res.error = "CreatePipe failed"; + return res; + } + SetHandleInformation(r_pipe, HANDLE_FLAG_INHERIT, 0); + + STARTUPINFOA si{}; + si.cb = sizeof(si); + si.dwFlags = STARTF_USESTDHANDLES | STARTF_USESHOWWINDOW; + si.wShowWindow = SW_HIDE; + si.hStdOutput = w_pipe; + si.hStdError = w_pipe; + si.hStdInput = GetStdHandle(STD_INPUT_HANDLE); + + PROCESS_INFORMATION pi{}; + std::string mutable_cmd = cmd; + BOOL ok = CreateProcessA( + nullptr, mutable_cmd.data(), + nullptr, nullptr, TRUE, + CREATE_NO_WINDOW, + nullptr, nullptr, &si, &pi); + CloseHandle(w_pipe); + if (!ok) { + CloseHandle(r_pipe); + DWORD e = GetLastError(); + char buf[128]; + std::snprintf(buf, sizeof(buf), "CreateProcess failed err=%lu cmd=%s", + (unsigned long)e, cmd.c_str()); + res.error = buf; + return res; + } + + // Lector + timeout: spawn thread lector, wait padre con timeout. + std::atomic done{false}; + std::string out; + std::thread reader([&]() { + char buf[4096]; + DWORD n = 0; + while (ReadFile(r_pipe, buf, sizeof(buf), &n, nullptr) && n > 0) { + out.append(buf, n); + } + done.store(true); + }); + + DWORD waited = WaitForSingleObject(pi.hProcess, + timeout_ms > 0 ? (DWORD)timeout_ms : INFINITE); + if (waited == WAIT_TIMEOUT) { + TerminateProcess(pi.hProcess, 1); + res.error = "timeout"; + } + + DWORD exit_code = 0; + GetExitCodeProcess(pi.hProcess, &exit_code); + CloseHandle(pi.hProcess); + CloseHandle(pi.hThread); + + // Pipe se cerrara cuando el proceso terminado libere los handles. + // Cerrar nuestro extremo para desbloquear lector si esta colgado. + CloseHandle(r_pipe); + if (reader.joinable()) reader.join(); + + res.exit_code = (int)exit_code; + res.stdout_data = std::move(out); + return res; +} + +#else // POSIX + +PyResult py_run(const std::vector& argv, int timeout_ms) { + PyResult res; + if (argv.empty()) { res.error = "argv empty"; return res; } + + // Build "cmd args..." via popen for simplicity. Escapado minimo. + std::string cmd; + for (size_t i = 0; i < argv.size(); ++i) { + if (i) cmd += ' '; + cmd += "'"; + for (char c : argv[i]) { + if (c == '\'') cmd += "'\\''"; + else cmd += c; + } + cmd += "'"; + } + cmd += " 2>&1"; + (void)timeout_ms; + + FILE* pipe = popen(cmd.c_str(), "r"); + if (!pipe) { res.error = "popen failed"; return res; } + std::string out; + char buf[4096]; + while (fgets(buf, sizeof(buf), pipe)) out.append(buf); + int rc = pclose(pipe); + res.exit_code = WIFEXITED(rc) ? WEXITSTATUS(rc) : -1; + res.stdout_data = std::move(out); + return res; +} + +#endif + +PyResult py_run_inline(const std::string& code, const std::vector& extra_args, + int timeout_ms) { + std::vector argv; + argv.push_back(py_resolve_interpreter()); + argv.push_back("-c"); + argv.push_back(code); + for (const auto& a : extra_args) argv.push_back(a); + return py_run(argv, timeout_ms); +} + +void py_run_async(const std::vector& argv, int timeout_ms, + std::function on_done) { + std::thread([argv, timeout_ms, cb = std::move(on_done)]() { + PyResult r = py_run(argv, timeout_ms); + if (cb) cb(std::move(r)); + }).detach(); +} + +} // namespace navegator diff --git a/py_subprocess.h b/py_subprocess.h new file mode 100644 index 0000000..af5ec97 --- /dev/null +++ b/py_subprocess.h @@ -0,0 +1,53 @@ +#pragma once + +// py_subprocess — spawn Python con args y capturar stdout. Lo usan los paneles +// AutoExtract y Recipes para invocar funciones del registry (cdp_open_url_and_wait, +// cdp_get_ax_tree, llm_propose_scraping_schema, cdp_extract_recipe, infer_json_rows_schema). +// +// Decisiones: +// - Heredoc inline: el script Python se pasa via -c "" para evitar archivos temporales. +// - PATH: usa "python3" o "python". Fallback: ${FN_REGISTRY_ROOT}/python/.venv/Scripts/python.exe +// (Windows venv layout) o /python/.venv/bin/python3 (POSIX). +// - Stdout: capturado completo. El llamante parsea JSON. +// - Stderr: redirigido a stdout para facilitar diagnostico (logs visibles). +// - Sin consola visible en Windows (CREATE_NO_WINDOW). +// - Async wrapper opcional: lanzar en thread y publicar resultado via callback. + +#include +#include +#include + +namespace navegator { + +struct PyResult { + int exit_code = -1; + std::string stdout_data; + std::string error; // mensaje propio si CreateProcess/popen fallo +}; + +// Devuelve la ruta al interprete python a usar. Prioridad: +// 1. ${FN_REGISTRY_ROOT}/python/.venv/Scripts/python.exe (Windows) +// 2. ${FN_REGISTRY_ROOT}/python/.venv/bin/python3 (POSIX/MinGW) +// 3. "python3" en PATH +// 4. "python" en PATH (Windows default) +std::string py_resolve_interpreter(); + +// Devuelve FN_REGISTRY_ROOT. Si no esta seteada, intenta deducirla: +// - Working dir del exe ".../fn_registry/projects/navegator/apps/". +// - Subiendo 4 niveles desde exe_dir. +std::string py_resolve_registry_root(); + +// Lanza python con argv. argv[0] DEBE ser el interprete (de py_resolve_interpreter()). +// Hereda env. Timeout en ms (0 = sin timeout). Devuelve PyResult con stdout + exit. +PyResult py_run(const std::vector& argv, int timeout_ms = 60000); + +// Helper: ejecuta un script inline via `python -c ""` con args extra. +PyResult py_run_inline(const std::string& code, const std::vector& extra_args, + int timeout_ms = 60000); + +// Async: ejecuta en thread y llama on_done en el thread del worker. +// Captura args por valor; el caller debe sincronizar acceso compartido. +void py_run_async(const std::vector& argv, int timeout_ms, + std::function on_done); + +} // namespace navegator diff --git a/recipes_panel.cpp b/recipes_panel.cpp new file mode 100644 index 0000000..0ed863b --- /dev/null +++ b/recipes_panel.cpp @@ -0,0 +1,313 @@ +// recipes_panel — Listado de recetas (YAML) en +// projects/navegator/profiles/default/recipes/*.yaml. +// +// Acciones por fila: +// Run -> subprocess Python con cdp_extract_recipe (record_run=True). +// Edit -> abre InputTextMultiline con el YAML; "Save" reescribe. +// Delete -> rm + refresh list. +// Open in data_factory -> noop (placeholder; mostraria link/cmd). + +#include "imgui.h" +#include "core/icons_tabler.h" +#include "core/tokens.h" + +#include "py_subprocess.h" +#include "session_state.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef _WIN32 +# define WIN32_LEAN_AND_MEAN +# include +#else +# include +# include +#endif + +namespace navegator { + +namespace { + +struct RecipeRow { + std::string name; + std::string url_pattern; + std::string yaml_path; + std::string last_run_status; + std::string last_run_at; + int rows_last_run = 0; +}; + +struct RecipesState { + std::mutex mu; + std::vector rows; + std::string status; + std::string last_error; + std::atomic busy{false}; + int editing_idx = -1; + std::string edit_buf; + char edit_textarea[16384] = {0}; +}; +RecipesState g_rs; + +std::string recipes_dir() { + std::string root = py_resolve_registry_root(); + if (root.empty()) return ""; +#ifdef _WIN32 + return root + "\\projects\\navegator\\profiles\\default\\recipes"; +#else + return root + "/projects/navegator/profiles/default/recipes"; +#endif +} + +std::string slurp(const std::string& path) { + std::ifstream f(path, std::ios::binary); + if (!f) return ""; + std::ostringstream ss; ss << f.rdbuf(); + return ss.str(); +} + +// Mini-parser YAML especifico: solo extrae name + url_pattern. +void parse_recipe_min(const std::string& body, RecipeRow& r) { + std::stringstream ss(body); + std::string line; + while (std::getline(ss, line)) { + auto strip = [](std::string s){ + size_t a = s.find_first_not_of(" \t"); + size_t b = s.find_last_not_of(" \t\r"); + return (a == std::string::npos) ? std::string() : s.substr(a, b - a + 1); + }; + if (line.rfind("name:", 0) == 0) { + r.name = strip(line.substr(5)); + if (!r.name.empty() && (r.name.front()=='"' || r.name.front()=='\'')) { + r.name = r.name.substr(1, r.name.size() - 2); + } + } else if (line.rfind("url_pattern:", 0) == 0) { + r.url_pattern = strip(line.substr(12)); + if (!r.url_pattern.empty() && (r.url_pattern.front()=='"' || r.url_pattern.front()=='\'')) { + r.url_pattern = r.url_pattern.substr(1, r.url_pattern.size() - 2); + } + } + } +} + +std::vector list_yaml_files(const std::string& dir) { + std::vector out; +#ifdef _WIN32 + std::string pattern = dir + "\\*.yaml"; + WIN32_FIND_DATAA fd; + HANDLE h = FindFirstFileA(pattern.c_str(), &fd); + if (h == INVALID_HANDLE_VALUE) return out; + do { + out.push_back(dir + "\\" + fd.cFileName); + } while (FindNextFileA(h, &fd)); + FindClose(h); +#else + DIR* d = opendir(dir.c_str()); + if (!d) return out; + while (auto e = readdir(d)) { + std::string n = e->d_name; + if (n.size() > 5 && n.substr(n.size() - 5) == ".yaml") { + out.push_back(dir + "/" + n); + } + } + closedir(d); +#endif + std::sort(out.begin(), out.end()); + return out; +} + +void refresh_list() { + std::string dir = recipes_dir(); + if (dir.empty()) { + std::lock_guard lk(g_rs.mu); + g_rs.last_error = "FN_REGISTRY_ROOT not set"; + return; + } + auto files = list_yaml_files(dir); + std::vector rows; + for (const auto& f : files) { + RecipeRow r; r.yaml_path = f; + std::string body = slurp(f); + parse_recipe_min(body, r); + if (r.name.empty()) { + // fallback al basename sin ext + size_t p1 = f.find_last_of("/\\"); + std::string base = (p1 == std::string::npos) ? f : f.substr(p1 + 1); + if (base.size() > 5) base = base.substr(0, base.size() - 5); + r.name = base; + } + rows.push_back(std::move(r)); + } + // Anota last_run_* desde data_factory.runs (subprocess sqlite3 best-effort). + // Lo dejamos como TODO — la primera version queda con campos vacios. + std::lock_guard lk(g_rs.mu); + g_rs.rows = std::move(rows); + g_rs.last_error.clear(); + g_rs.status = "Listed " + std::to_string(g_rs.rows.size()) + " recipes"; +} + +void run_recipe_async(const std::string& yaml_path) { + if (g_rs.busy.exchange(true)) return; + { + std::lock_guard lk(g_rs.mu); + g_rs.status = "Running " + yaml_path; + } + std::thread([yaml_path]() { + const char* code = R"PY( +import sys, os, json, traceback +root = os.environ.get('FN_REGISTRY_ROOT', '') +if not root: + print(json.dumps({"error":"FN_REGISTRY_ROOT not set"})); sys.exit(2) +for sub in ('pipelines','core','infra'): + sys.path.insert(0, os.path.join(root, 'python', 'functions', sub)) +try: + from cdp_extract_recipe import cdp_extract_recipe + path = sys.argv[1] + res = cdp_extract_recipe(path, debug_port=9222, record_run=True) + print(json.dumps(res if isinstance(res, dict) else {"result": res})) +except Exception as e: + print(json.dumps({"error": str(e), "trace": traceback.format_exc()})); sys.exit(1) +)PY"; + std::vector argv; + argv.push_back(py_resolve_interpreter()); + argv.push_back("-c"); + argv.push_back(code); + argv.push_back(yaml_path); + PyResult r = py_run(argv, 120000); + { + std::lock_guard lk(g_rs.mu); + if (r.exit_code != 0) { + g_rs.last_error = r.error.empty() ? "python exited non-zero" : r.error; + g_rs.status = "Run failed"; + } else { + g_rs.status = "Run OK: " + r.stdout_data.substr(0, 200); + } + } + g_rs.busy.store(false); + refresh_list(); + }).detach(); +} + +void delete_recipe(const std::string& path) { + std::remove(path.c_str()); + refresh_list(); +} + +} // anon + +void render_recipes_panel(bool* p_open) { + if (!ImGui::Begin(TI_LIST_DETAILS " Recipes", p_open)) { + ImGui::End(); + return; + } + + if (ImGui::Button(TI_REFRESH " Refresh")) refresh_list(); + ImGui::SameLine(); + { + std::lock_guard lk(g_rs.mu); + if (!g_rs.status.empty()) ImGui::Text("%s", g_rs.status.c_str()); + if (!g_rs.last_error.empty()) { + ImGui::PushStyleColor(ImGuiCol_Text, fn_tokens::colors::error); + ImGui::TextWrapped("Error: %s", g_rs.last_error.c_str()); + ImGui::PopStyleColor(); + } + } + ImGui::Separator(); + + std::vector rows_copy; + int editing_idx = -1; + { + std::lock_guard lk(g_rs.mu); + rows_copy = g_rs.rows; + editing_idx = g_rs.editing_idx; + } + + if (rows_copy.empty()) { + ImGui::TextDisabled("No recipes in projects/navegator/profiles/default/recipes/."); + ImGui::TextDisabled("Use AutoExtract panel to create one."); + } else if (ImGui::BeginTable("##recipes_tbl", 6, + ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg)) { + ImGui::TableSetupColumn("name"); + ImGui::TableSetupColumn("url_pattern"); + ImGui::TableSetupColumn("last_status"); + ImGui::TableSetupColumn("last_at"); + ImGui::TableSetupColumn("rows"); + ImGui::TableSetupColumn("actions"); + ImGui::TableHeadersRow(); + + for (size_t i = 0; i < rows_copy.size(); ++i) { + const RecipeRow& r = rows_copy[i]; + ImGui::TableNextRow(); + ImGui::PushID((int)i); + ImGui::TableNextColumn(); + ImGui::TextUnformatted(r.name.c_str()); + ImGui::TableNextColumn(); + ImGui::TextWrapped("%s", r.url_pattern.c_str()); + ImGui::TableNextColumn(); + ImGui::TextUnformatted(r.last_run_status.empty() ? "-" : r.last_run_status.c_str()); + ImGui::TableNextColumn(); + ImGui::TextUnformatted(r.last_run_at.empty() ? "-" : r.last_run_at.c_str()); + ImGui::TableNextColumn(); + ImGui::Text("%d", r.rows_last_run); + ImGui::TableNextColumn(); + if (ImGui::SmallButton("Run")) run_recipe_async(r.yaml_path); + ImGui::SameLine(); + if (ImGui::SmallButton("Edit")) { + std::string body = slurp(r.yaml_path); + std::lock_guard lk(g_rs.mu); + g_rs.editing_idx = (int)i; + g_rs.edit_buf = body; + std::snprintf(g_rs.edit_textarea, sizeof(g_rs.edit_textarea), + "%s", body.c_str()); + } + ImGui::SameLine(); + if (ImGui::SmallButton("Delete")) delete_recipe(r.yaml_path); + ImGui::SameLine(); + if (ImGui::SmallButton("Open in data_factory")) { + // placeholder — solo loguea + std::lock_guard lk(g_rs.mu); + g_rs.status = "open in data_factory: " + r.name + " (not wired)"; + } + ImGui::PopID(); + } + ImGui::EndTable(); + } + + if (editing_idx >= 0 && editing_idx < (int)rows_copy.size()) { + ImGui::Separator(); + ImGui::Text("Editing: %s", rows_copy[editing_idx].yaml_path.c_str()); + ImGui::InputTextMultiline("##rec_edit", g_rs.edit_textarea, + sizeof(g_rs.edit_textarea), + ImVec2(-1, 220)); + if (ImGui::Button(TI_DEVICE_FLOPPY " Save")) { + std::ofstream f(rows_copy[editing_idx].yaml_path, std::ios::binary); + if (f) { + f << g_rs.edit_textarea; + f.close(); + std::lock_guard lk(g_rs.mu); + g_rs.status = "Saved " + rows_copy[editing_idx].yaml_path; + g_rs.editing_idx = -1; + } + refresh_list(); + } + ImGui::SameLine(); + if (ImGui::Button("Cancel")) { + std::lock_guard lk(g_rs.mu); + g_rs.editing_idx = -1; + } + } + + ImGui::End(); +} + +} // namespace navegator