commit f7923de9ff1ca7934354aa17500e74d0ef56f0b9 Author: fn-registry agent Date: Sat May 9 18:11:22 2026 +0200 chore: sync from fn-registry agent diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..44dad20 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,34 @@ +cmake_minimum_required(VERSION 3.20) + +# odr_console — lanzador GUI + bucle reactivo 5 pasos. Issue 0066. +# Stack: ImGui + SQLite (registry.db RO). DuckDB y jobs_pool: fases siguientes. + +# SQLite3: prefer parent target. Fallback a vendored. +find_package(SQLite3 QUIET) +if(NOT SQLite3_FOUND AND NOT TARGET sqlite3_vendored) + set(SQLITE3_AMALG_DIR ${CMAKE_SOURCE_DIR}/vendor/sqlite3) + add_library(sqlite3_vendored STATIC ${SQLITE3_AMALG_DIR}/sqlite3.c) + target_include_directories(sqlite3_vendored PUBLIC ${SQLITE3_AMALG_DIR}) + target_compile_definitions(sqlite3_vendored PRIVATE + SQLITE_THREADSAFE=1 + SQLITE_ENABLE_FTS5 + SQLITE_ENABLE_JSON1 + ) + add_library(SQLite::SQLite3 ALIAS sqlite3_vendored) +endif() + +add_imgui_app(odr_console + main.cpp + data_registry.cpp +) + +target_include_directories(odr_console PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/vendor/sqlite3 +) + +target_link_libraries(odr_console PRIVATE SQLite::SQLite3) + +if(WIN32) + set_target_properties(odr_console PROPERTIES WIN32_EXECUTABLE TRUE) +endif() diff --git a/app.md b/app.md new file mode 100644 index 0000000..5f45642 --- /dev/null +++ b/app.md @@ -0,0 +1,69 @@ +--- +name: odr_console +lang: cpp +domain: tools +description: "Lanzador GUI de funciones del registry para recolectar datos online. Panel de busqueda FTS5, jobs queue async (workers concurrentes), pipeline builder DAG, browser DuckDB, assertions/proposals. Aplica bucle reactivo de 5 pasos sobre operations.db propia." +tags: [imgui, data-collection, scraping, duckdb, jobs, cdp] +uses_functions: [] +uses_types: [] +framework: "imgui" +entry_point: "main.cpp" +dir_path: "projects/online_data_recopilation/apps/odr_console" +repo_url: "" +--- + +## Notas + +App C++ ImGui que orquesta: + +1. **Launcher panel** — busqueda FTS5 sobre `registry.db`. Lanza cualquier funcion/pipeline con form auto-generado desde `params_schema`. +2. **Pipeline builder** — DAG visual con `imgui_node_editor`. Compone collectors validando composabilidad (`returns` ↔ `uses_types`). Persiste en `operations.relations` con status `designed`. +3. **Jobs queue** — Pool de N workers (default 4). Cada job = subprocess Python collector. Live progress por panel (`PROGRESS:` en stderr). Reusa `jobs_pool_cpp_core` (extraido de osint_graph en issue 0065). +4. **Datasets browser** — Panel DuckDB embebido. Query editor + tabla preview + ImPlot para charts. Lee parquet de `vaults/odr_data/`. +5. **Entities + assertions** — Vista de `operations.entities` por dataset. Editor SQL para assertions. Boton "Eval --react" lanza paso 4 del bucle. +6. **Proposals inbox** — Lista pending de `registry.proposals` originadas por assertions fallidas. + +### Estructura + +``` +odr_console/ + main.cpp # fn::run_app + render() + paneles + views_launcher.cpp # Panel 1 + views_pipelines.cpp # Panel 2 + views_jobs.cpp # Panel 3 + views_datasets.cpp # Panel 4 + views_assertions.cpp # Panel 5 + views_proposals.cpp # Panel 6 + data_registry.cpp # Lee registry.db (FTS5, funcs, types) + data_operations.cpp # CRUD operations.db (relations/executions/entities/assertions) + data_duck.cpp # DuckDB connector + ingest + collectors/ # Mismo schema que enrichers de graph_explorer + api_hn_top/ # MVP: HackerNews top stories via API + manifest.yaml + run.py + migrations/ # operations.db migrations (esquema 5-pasos) + CMakeLists.txt +``` + +### Local files (regla cpp_apps §7) + +- `local_files/odr_console.ini` — settings persistidas +- `local_files/imgui.ini` — layout +- `local_files/odr.duckdb` — DuckDB embebido (datos crudos pequeños) +- `local_files/cache//.{html,json,parquet}` — cache addressable +- `operations.db` queda en el dir del exe (consultable por `fn ops`) + +### Decisiones tomadas + +| Tema | Decision | +|---|---| +| Workers default | 4 (mas que graph_explorer porque crawls esperan red) | +| operations.db | Una unica por la app | +| DuckDB | Embebido (linkar libduckdb), no subprocess | +| Collectors lang inicial | Python (espejo graph_explorer enrichers) | +| Browser | CDP via `cdp-cli` Go (issue 0038) cuando aplica | + +### Decisiones pendientes + +- Refactor jobs system: en paralelo a MVP. Ver issue 0065. +- Schema operations.db: requiere migracion 001 con relations/executions/entities/types_snapshot/assertions/assertion_results (ver `fn_operations/migrations/`). diff --git a/collectors/api_hn_top/manifest.yaml b/collectors/api_hn_top/manifest.yaml new file mode 100644 index 0000000..233622b --- /dev/null +++ b/collectors/api_hn_top/manifest.yaml @@ -0,0 +1,11 @@ +id: api_hn_top +name: "HackerNews top stories" +description: "Fetcha las top N stories de la API publica de HackerNews y crea entities en operations.db con metadata.{title,url,score,by,time}. Sin auth, sin rate limit estricto. Util como collector MVP para validar el flow odr_console end-to-end." +applies_to: [] +emits: [HnStory] +relations: [] +uses_functions: + - http_get_json_py_infra +params: + - { name: limit, type: int, default: 30 } + - { name: timeout_s, type: int, default: 15 } diff --git a/collectors/api_hn_top/run.py b/collectors/api_hn_top/run.py new file mode 100644 index 0000000..1ef59a1 --- /dev/null +++ b/collectors/api_hn_top/run.py @@ -0,0 +1,237 @@ +#!/usr/bin/env python3 +"""Collector api_hn_top — issue 0066 MVP. + +Wire protocol (espejo del de graph_explorer enrichers, issue 0026): + - stdin: JSON con `ops_db_path`, `app_dir`, `registry_root`, `params` (limit, timeout_s). + - stderr: lineas `PROGRESS: ` para feedback de UI. + - stdout: una linea JSON al final con resumen `{entities_added, items}`. + - exit code 0 = ok, !=0 = error. + +Uso standalone (sin odr_console): + cd projects/online_data_recopilation/apps/odr_console + echo '{"ops_db_path":"operations.db","app_dir":".","params":{"limit":5}}' \ + | python/.venv/bin/python3 collectors/api_hn_top/run.py +""" +from __future__ import annotations + +import json +import os +import sqlite3 +import sys +import time +import uuid +from datetime import datetime, timezone +from pathlib import Path + + +def progress(p: float, stage: str = "") -> None: + sys.stderr.write(f"PROGRESS:{p:.2f} {stage}\n") + sys.stderr.flush() + + +def log(msg: str) -> None: + sys.stderr.write(f"{msg}\n") + sys.stderr.flush() + + +def load_registry_funcs(registry_root: str): + """Importa funciones del registry. Prefiere `_vendored/`, fallback a + `/python/functions/` (modo dev).""" + vendored = Path(__file__).parent / "_vendored" + if vendored.is_dir(): + if str(vendored) not in sys.path: + sys.path.insert(0, str(vendored)) + return + if registry_root: + path = Path(registry_root) / "python" / "functions" + if path.is_dir() and str(path) not in sys.path: + sys.path.insert(0, str(path)) + + +def now_utc_iso() -> str: + return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") + + +def open_ops_db(path: str) -> sqlite3.Connection: + conn = sqlite3.connect(path) + conn.execute("PRAGMA foreign_keys=ON;") + return conn + + +def insert_entity( + conn: sqlite3.Connection, + eid: str, + name: str, + type_ref: str, + metadata: dict, + source: str, +) -> None: + ts = now_utc_iso() + conn.execute( + """INSERT OR REPLACE INTO entities + (id, name, type_ref, status, description, domain, tags, + source, metadata, notes, created_at, updated_at) + VALUES (?, ?, ?, 'active', '', '', '[]', ?, ?, '', ?, ?)""", + (eid, name, type_ref, source, json.dumps(metadata), ts, ts), + ) + + +def insert_execution( + conn: sqlite3.Connection, + pipeline_id: str, + started_at: str, + ended_at: str, + duration_ms: int, + records_in: int, + records_out: int, + status: str, + error: str, + metrics: dict, +) -> None: + ts = now_utc_iso() + conn.execute( + """INSERT INTO executions + (id, pipeline_id, relation_id, status, started_at, ended_at, + duration_ms, records_in, records_out, error, metrics, created_at) + VALUES (?, ?, '', ?, ?, ?, ?, ?, ?, ?, ?, ?)""", + ( + f"e_{uuid.uuid4().hex[:12]}", + pipeline_id, + status, + started_at, + ended_at, + duration_ms, + records_in, + records_out, + error, + json.dumps(metrics), + ts, + ), + ) + + +def main() -> int: + raw = sys.stdin.read() + try: + ctx = json.loads(raw) if raw.strip() else {} + except Exception as e: + log(f"stdin not valid JSON: {e}") + return 2 + + ops_db_path = ctx.get("ops_db_path") or "operations.db" + registry_root = ctx.get("registry_root") or os.environ.get("FN_REGISTRY_ROOT", "") + params = ctx.get("params") or {} + limit = int(params.get("limit", 30)) + timeout_s = int(params.get("timeout_s", 15)) + + load_registry_funcs(registry_root) + try: + from infra.http_get_json import http_get_json + except ImportError: + # Fallback: stdlib urllib directo si el registry no esta disponible. + log("registry funcs unavailable; falling back to urllib") + import urllib.request + + def http_get_json(url, headers=None, params=None, timeout=30.0): + req = urllib.request.Request(url, headers={"Accept": "application/json"}) + with urllib.request.urlopen(req, timeout=timeout) as r: + return json.loads(r.read().decode("utf-8")) + + started = time.time() + started_iso = now_utc_iso() + progress(0.05, "fetch top ids") + + try: + ids = http_get_json( + "https://hacker-news.firebaseio.com/v0/topstories.json", + timeout=timeout_s, + ) + if not isinstance(ids, list): + raise RuntimeError(f"expected list, got {type(ids).__name__}") + except Exception as e: + log(f"fetch topstories failed: {e}") + ended_iso = now_utc_iso() + try: + conn = open_ops_db(ops_db_path) + insert_execution( + conn, "api_hn_top", started_iso, ended_iso, + int((time.time() - started) * 1000), 0, 0, "failure", + str(e), {}, + ) + conn.commit() + conn.close() + except Exception: + pass + return 1 + + ids = ids[:limit] + progress(0.2, f"fetch {len(ids)} stories") + + items = [] + n_added = 0 + try: + conn = open_ops_db(ops_db_path) + except Exception as e: + log(f"open ops_db failed: {e}") + return 1 + + for i, sid in enumerate(ids): + try: + story = http_get_json( + f"https://hacker-news.firebaseio.com/v0/item/{sid}.json", + timeout=timeout_s, + ) + except Exception as e: + log(f"item {sid}: {e}") + continue + if not isinstance(story, dict): + continue + + eid = f"hn_{sid}" + title = story.get("title") or "(untitled)" + meta = { + "hn_id": sid, + "title": title, + "url": story.get("url") or "", + "score": story.get("score"), + "by": story.get("by"), + "time": story.get("time"), + "type": story.get("type"), + "descendants": story.get("descendants"), + } + try: + insert_entity(conn, eid, title, "HnStory", meta, "api_hn_top") + n_added += 1 + items.append({"id": eid, "title": title}) + except Exception as e: + log(f"insert {eid}: {e}") + continue + + progress(0.2 + 0.7 * (i + 1) / len(ids), f"{i+1}/{len(ids)}") + + ended_iso = now_utc_iso() + duration_ms = int((time.time() - started) * 1000) + + try: + insert_execution( + conn, "api_hn_top", started_iso, ended_iso, duration_ms, + len(ids), n_added, "success", "", + {"limit": limit, "fetched": len(ids), "stored": n_added}, + ) + conn.commit() + finally: + conn.close() + + progress(1.0, "done") + summary = { + "entities_added": n_added, + "items_total": len(ids), + "duration_ms": duration_ms, + } + sys.stdout.write(json.dumps(summary) + "\n") + sys.stdout.flush() + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/data_registry.cpp b/data_registry.cpp new file mode 100644 index 0000000..8855499 --- /dev/null +++ b/data_registry.cpp @@ -0,0 +1,110 @@ +#include "data_registry.h" + +#include + +#include +#include + +namespace { + +std::string col_text(sqlite3_stmt* st, int i) { + const unsigned char* t = sqlite3_column_text(st, i); + return t ? std::string(reinterpret_cast(t)) : std::string(); +} + +bool fill_row(sqlite3_stmt* st, RegistryRow& row) { + row.id = col_text(st, 0); + row.name = col_text(st, 1); + row.kind = col_text(st, 2); + row.lang = col_text(st, 3); + row.domain = col_text(st, 4); + row.purity = col_text(st, 5); + row.signature = col_text(st, 6); + row.description = col_text(st, 7); + return true; +} + +// FTS5 escape: el caller pasa texto libre; lo encerramos en comillas dobles +// y duplicamos las internas. No interpretamos operadores FTS5. +std::string fts5_quote(const std::string& q) { + std::string out; + out.reserve(q.size() + 4); + out.push_back('"'); + for (char c : q) { + if (c == '"') out += "\"\""; + else out.push_back(c); + } + out.push_back('"'); + return out; +} + +} // namespace + +bool registry_open(OdrRegistry& r, const std::string& db_path) { + if (r.db) sqlite3_close(r.db); + r.db = nullptr; + int rc = sqlite3_open_v2(db_path.c_str(), &r.db, SQLITE_OPEN_READONLY, + nullptr); + if (rc != SQLITE_OK) { + std::fprintf(stderr, "[odr_console] sqlite_open: %s\n", + sqlite3_errmsg(r.db)); + if (r.db) { sqlite3_close(r.db); r.db = nullptr; } + return false; + } + return true; +} + +void registry_close(OdrRegistry& r) { + if (r.db) sqlite3_close(r.db); + r.db = nullptr; +} + +bool registry_list_recent(OdrRegistry& r, int limit, + std::vector& out) { + out.clear(); + if (!r.db) return false; + const char* sql = + "SELECT id, name, kind, lang, domain, purity, signature, description " + "FROM functions ORDER BY updated_at DESC LIMIT ?;"; + sqlite3_stmt* st = nullptr; + if (sqlite3_prepare_v2(r.db, sql, -1, &st, nullptr) != SQLITE_OK) { + return false; + } + sqlite3_bind_int(st, 1, limit); + while (sqlite3_step(st) == SQLITE_ROW) { + RegistryRow row; + fill_row(st, row); + out.push_back(std::move(row)); + } + sqlite3_finalize(st); + return true; +} + +bool registry_search(OdrRegistry& r, const std::string& query, int limit, + std::vector& out) { + out.clear(); + if (!r.db) return false; + + std::string match = fts5_quote(query); + const char* sql = + "SELECT f.id, f.name, f.kind, f.lang, f.domain, f.purity, " + " f.signature, f.description " + "FROM functions f " + "WHERE f.id IN (SELECT id FROM functions_fts WHERE functions_fts MATCH ?) " + "ORDER BY f.name LIMIT ?;"; + sqlite3_stmt* st = nullptr; + if (sqlite3_prepare_v2(r.db, sql, -1, &st, nullptr) != SQLITE_OK) { + std::fprintf(stderr, "[odr_console] prepare_v2 search: %s\n", + sqlite3_errmsg(r.db)); + return false; + } + sqlite3_bind_text(st, 1, match.c_str(), -1, SQLITE_TRANSIENT); + sqlite3_bind_int(st, 2, limit); + while (sqlite3_step(st) == SQLITE_ROW) { + RegistryRow row; + fill_row(st, row); + out.push_back(std::move(row)); + } + sqlite3_finalize(st); + return true; +} diff --git a/data_registry.h b/data_registry.h new file mode 100644 index 0000000..9ad2df7 --- /dev/null +++ b/data_registry.h @@ -0,0 +1,32 @@ +#pragma once + +#include +#include + +struct sqlite3; + +struct OdrRegistry { + sqlite3* db = nullptr; +}; + +struct RegistryRow { + std::string id; + std::string name; + std::string kind; // function | pipeline | component + std::string lang; // go | py | bash | ts | cpp + std::string domain; + std::string purity; // pure | impure + std::string signature; + std::string description; +}; + +bool registry_open(OdrRegistry& r, const std::string& db_path); +void registry_close(OdrRegistry& r); + +// Lista las N funciones mas recientes (ORDER BY updated_at DESC). +bool registry_list_recent(OdrRegistry& r, int limit, + std::vector& out); + +// FTS5 search sobre name + description + tags + signature + code. +bool registry_search(OdrRegistry& r, const std::string& query, int limit, + std::vector& out); diff --git a/main.cpp b/main.cpp new file mode 100644 index 0000000..6e9b703 --- /dev/null +++ b/main.cpp @@ -0,0 +1,185 @@ +// odr_console — lanzador GUI de funciones del registry para recolectar datos online. +// MVP: panel launcher + placeholders. Ver issue 0066. + +#include "app_base.h" +#include "imgui.h" +#include "core/app_menubar.h" +#include "core/app_about.h" +#include "core/app_settings.h" +#include "core/icon_font.h" +#include "core/icons_tabler.h" +#include "core/tokens.h" +#include "core/logger.h" + +#include "data_registry.h" + +#include +#include +#include +#include +#include + +static OdrRegistry g_registry; +static std::string g_db_path; +static char g_search_buf[256] = ""; +static std::vector g_results; +static int g_selected = -1; + +static void do_search() { + g_results.clear(); + g_selected = -1; + if (g_search_buf[0] == '\0') { + registry_list_recent(g_registry, 50, g_results); + } else { + registry_search(g_registry, g_search_buf, 50, g_results); + } +} + +static bool g_show_launcher = true; +static bool g_show_jobs = true; +static bool g_show_datasets = true; + +static void draw_launcher() { + if (!g_show_launcher) return; + if (!ImGui::Begin(TI_SEARCH " Launcher", &g_show_launcher)) { + ImGui::End(); + return; + } + + ImGui::PushItemWidth(-1); + if (ImGui::InputTextWithHint("##search", "Search functions/pipelines (FTS5)...", + g_search_buf, sizeof(g_search_buf), + ImGuiInputTextFlags_EnterReturnsTrue)) { + do_search(); + } + ImGui::PopItemWidth(); + + if (ImGui::Button("Search")) do_search(); + ImGui::SameLine(); + ImGui::TextDisabled("%zu hits", g_results.size()); + + ImGui::Separator(); + + if (ImGui::BeginTable("##results", 4, + ImGuiTableFlags_RowBg | ImGuiTableFlags_Borders | + ImGuiTableFlags_ScrollY | ImGuiTableFlags_Resizable)) { + ImGui::TableSetupColumn("ID", ImGuiTableColumnFlags_WidthStretch); + ImGui::TableSetupColumn("Kind", ImGuiTableColumnFlags_WidthFixed, 80); + ImGui::TableSetupColumn("Domain", ImGuiTableColumnFlags_WidthFixed, 100); + ImGui::TableSetupColumn("Description", ImGuiTableColumnFlags_WidthStretch); + ImGui::TableHeadersRow(); + + for (int i = 0; i < (int)g_results.size(); ++i) { + const auto& r = g_results[i]; + ImGui::TableNextRow(); + ImGui::TableSetColumnIndex(0); + bool sel = (i == g_selected); + if (ImGui::Selectable(r.id.c_str(), sel, + ImGuiSelectableFlags_SpanAllColumns)) { + g_selected = i; + } + ImGui::TableSetColumnIndex(1); + ImGui::TextUnformatted(r.kind.c_str()); + ImGui::TableSetColumnIndex(2); + ImGui::TextUnformatted(r.domain.c_str()); + ImGui::TableSetColumnIndex(3); + ImGui::TextUnformatted(r.description.c_str()); + } + ImGui::EndTable(); + } + + if (g_selected >= 0 && g_selected < (int)g_results.size()) { + ImGui::Separator(); + const auto& r = g_results[g_selected]; + ImGui::Text("Selected: %s", r.id.c_str()); + ImGui::TextWrapped("Signature: %s", r.signature.c_str()); + ImGui::Spacing(); + ImGui::BeginDisabled(true); + ImGui::Button(TI_PLAYER_PLAY " Run"); + ImGui::EndDisabled(); + ImGui::SameLine(); + ImGui::TextDisabled("(jobs system pending — issue 0065)"); + } + + ImGui::End(); +} + +static void draw_jobs() { + if (!g_show_jobs) return; + if (!ImGui::Begin(TI_LIST " Jobs", &g_show_jobs)) { + ImGui::End(); + return; + } + ImGui::TextDisabled("Jobs queue panel — pendiente issue 0065"); + ImGui::TextWrapped( + "Cuando jobs_pool_cpp_core este extraido del graph_explorer al " + "registry, este panel mostrara cola/running/done con live progress. " + "Ver dev/issues/0067-odr-osint-prereqs-roadmap.md"); + ImGui::End(); +} + +static void draw_datasets() { + if (!g_show_datasets) return; + if (!ImGui::Begin(TI_DATABASE " Datasets", &g_show_datasets)) { + ImGui::End(); + return; + } + ImGui::TextDisabled("DuckDB browser — pendiente fase 2 del MVP"); + ImGui::End(); +} + +static void render() { + static fn_ui::PanelToggle panels[] = { + { "Launcher", nullptr, &g_show_launcher }, + { "Jobs", nullptr, &g_show_jobs }, + { "Datasets", nullptr, &g_show_datasets }, + }; + fn_ui::app_menubar(panels, + sizeof(panels) / sizeof(panels[0]), + nullptr); + draw_launcher(); + draw_jobs(); + draw_datasets(); +} + +int main(int argc, char** argv) { + // CLI: opcional, primer arg = path a registry.db (override). + if (argc >= 2) { + g_db_path = argv[1]; + } + if (g_db_path.empty()) { + const char* env = std::getenv("FN_REGISTRY_DB"); + if (env && *env) g_db_path = env; + } + if (g_db_path.empty()) { + const char* root = std::getenv("FN_REGISTRY_ROOT"); + if (root && *root) { + g_db_path = std::string(root) + "/registry.db"; + } + } + + if (g_db_path.empty()) { + std::fprintf(stderr, + "[odr_console] No registry.db path. Pass as arg or set " + "FN_REGISTRY_DB / FN_REGISTRY_ROOT.\n"); + } else if (!registry_open(g_registry, g_db_path)) { + std::fprintf(stderr, + "[odr_console] Failed to open registry.db: %s\n", + g_db_path.c_str()); + } else { + registry_list_recent(g_registry, 50, g_results); + } + + fn::AppConfig cfg; + cfg.title = "odr_console — online data recopilation"; + cfg.width = 1400; + cfg.height = 900; + cfg.about = { "odr_console", "0.1.0", + "Lanzador GUI de funciones del registry para recolectar " + "datos online (APIs, scraping, browser CDP). MVP." }; + cfg.log = { "odr_console.log", 1 }; + + int rc = fn::run_app(cfg, render); + registry_close(g_registry); + return rc; +} diff --git a/operations.db b/operations.db new file mode 100644 index 0000000..69becc5 Binary files /dev/null and b/operations.db differ