chore: sync from fn-registry agent

This commit is contained in:
fn-registry agent
2026-05-09 18:11:22 +02:00
commit f7923de9ff
8 changed files with 678 additions and 0 deletions
+34
View File
@@ -0,0 +1,34 @@
cmake_minimum_required(VERSION 3.20)
# odr_console — lanzador GUI + bucle reactivo 5 pasos. Issue 0066.
# Stack: ImGui + SQLite (registry.db RO). DuckDB y jobs_pool: fases siguientes.
# SQLite3: prefer parent target. Fallback a vendored.
find_package(SQLite3 QUIET)
if(NOT SQLite3_FOUND AND NOT TARGET sqlite3_vendored)
set(SQLITE3_AMALG_DIR ${CMAKE_SOURCE_DIR}/vendor/sqlite3)
add_library(sqlite3_vendored STATIC ${SQLITE3_AMALG_DIR}/sqlite3.c)
target_include_directories(sqlite3_vendored PUBLIC ${SQLITE3_AMALG_DIR})
target_compile_definitions(sqlite3_vendored PRIVATE
SQLITE_THREADSAFE=1
SQLITE_ENABLE_FTS5
SQLITE_ENABLE_JSON1
)
add_library(SQLite::SQLite3 ALIAS sqlite3_vendored)
endif()
add_imgui_app(odr_console
main.cpp
data_registry.cpp
)
target_include_directories(odr_console PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_SOURCE_DIR}/vendor/sqlite3
)
target_link_libraries(odr_console PRIVATE SQLite::SQLite3)
if(WIN32)
set_target_properties(odr_console PROPERTIES WIN32_EXECUTABLE TRUE)
endif()
+69
View File
@@ -0,0 +1,69 @@
---
name: odr_console
lang: cpp
domain: tools
description: "Lanzador GUI de funciones del registry para recolectar datos online. Panel de busqueda FTS5, jobs queue async (workers concurrentes), pipeline builder DAG, browser DuckDB, assertions/proposals. Aplica bucle reactivo de 5 pasos sobre operations.db propia."
tags: [imgui, data-collection, scraping, duckdb, jobs, cdp]
uses_functions: []
uses_types: []
framework: "imgui"
entry_point: "main.cpp"
dir_path: "projects/online_data_recopilation/apps/odr_console"
repo_url: ""
---
## Notas
App C++ ImGui que orquesta:
1. **Launcher panel** — busqueda FTS5 sobre `registry.db`. Lanza cualquier funcion/pipeline con form auto-generado desde `params_schema`.
2. **Pipeline builder** — DAG visual con `imgui_node_editor`. Compone collectors validando composabilidad (`returns``uses_types`). Persiste en `operations.relations` con status `designed`.
3. **Jobs queue** — Pool de N workers (default 4). Cada job = subprocess Python collector. Live progress por panel (`PROGRESS:` en stderr). Reusa `jobs_pool_cpp_core` (extraido de osint_graph en issue 0065).
4. **Datasets browser** — Panel DuckDB embebido. Query editor + tabla preview + ImPlot para charts. Lee parquet de `vaults/odr_data/`.
5. **Entities + assertions** — Vista de `operations.entities` por dataset. Editor SQL para assertions. Boton "Eval --react" lanza paso 4 del bucle.
6. **Proposals inbox** — Lista pending de `registry.proposals` originadas por assertions fallidas.
### Estructura
```
odr_console/
main.cpp # fn::run_app + render() + paneles
views_launcher.cpp # Panel 1
views_pipelines.cpp # Panel 2
views_jobs.cpp # Panel 3
views_datasets.cpp # Panel 4
views_assertions.cpp # Panel 5
views_proposals.cpp # Panel 6
data_registry.cpp # Lee registry.db (FTS5, funcs, types)
data_operations.cpp # CRUD operations.db (relations/executions/entities/assertions)
data_duck.cpp # DuckDB connector + ingest
collectors/ # Mismo schema que enrichers de graph_explorer
api_hn_top/ # MVP: HackerNews top stories via API
manifest.yaml
run.py
migrations/ # operations.db migrations (esquema 5-pasos)
CMakeLists.txt
```
### Local files (regla cpp_apps §7)
- `local_files/odr_console.ini` — settings persistidas
- `local_files/imgui.ini` — layout
- `local_files/odr.duckdb` — DuckDB embebido (datos crudos pequeños)
- `local_files/cache/<sha[0:2]>/<sha>.{html,json,parquet}` — cache addressable
- `operations.db` queda en el dir del exe (consultable por `fn ops`)
### Decisiones tomadas
| Tema | Decision |
|---|---|
| Workers default | 4 (mas que graph_explorer porque crawls esperan red) |
| operations.db | Una unica por la app |
| DuckDB | Embebido (linkar libduckdb), no subprocess |
| Collectors lang inicial | Python (espejo graph_explorer enrichers) |
| Browser | CDP via `cdp-cli` Go (issue 0038) cuando aplica |
### Decisiones pendientes
- Refactor jobs system: en paralelo a MVP. Ver issue 0065.
- Schema operations.db: requiere migracion 001 con relations/executions/entities/types_snapshot/assertions/assertion_results (ver `fn_operations/migrations/`).
+11
View File
@@ -0,0 +1,11 @@
id: api_hn_top
name: "HackerNews top stories"
description: "Fetcha las top N stories de la API publica de HackerNews y crea entities en operations.db con metadata.{title,url,score,by,time}. Sin auth, sin rate limit estricto. Util como collector MVP para validar el flow odr_console end-to-end."
applies_to: []
emits: [HnStory]
relations: []
uses_functions:
- http_get_json_py_infra
params:
- { name: limit, type: int, default: 30 }
- { name: timeout_s, type: int, default: 15 }
+237
View File
@@ -0,0 +1,237 @@
#!/usr/bin/env python3
"""Collector api_hn_top — issue 0066 MVP.
Wire protocol (espejo del de graph_explorer enrichers, issue 0026):
- stdin: JSON con `ops_db_path`, `app_dir`, `registry_root`, `params` (limit, timeout_s).
- stderr: lineas `PROGRESS:<float> <stage>` para feedback de UI.
- stdout: una linea JSON al final con resumen `{entities_added, items}`.
- exit code 0 = ok, !=0 = error.
Uso standalone (sin odr_console):
cd projects/online_data_recopilation/apps/odr_console
echo '{"ops_db_path":"operations.db","app_dir":".","params":{"limit":5}}' \
| python/.venv/bin/python3 collectors/api_hn_top/run.py
"""
from __future__ import annotations
import json
import os
import sqlite3
import sys
import time
import uuid
from datetime import datetime, timezone
from pathlib import Path
def progress(p: float, stage: str = "") -> None:
sys.stderr.write(f"PROGRESS:{p:.2f} {stage}\n")
sys.stderr.flush()
def log(msg: str) -> None:
sys.stderr.write(f"{msg}\n")
sys.stderr.flush()
def load_registry_funcs(registry_root: str):
"""Importa funciones del registry. Prefiere `_vendored/`, fallback a
`<registry_root>/python/functions/` (modo dev)."""
vendored = Path(__file__).parent / "_vendored"
if vendored.is_dir():
if str(vendored) not in sys.path:
sys.path.insert(0, str(vendored))
return
if registry_root:
path = Path(registry_root) / "python" / "functions"
if path.is_dir() and str(path) not in sys.path:
sys.path.insert(0, str(path))
def now_utc_iso() -> str:
return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
def open_ops_db(path: str) -> sqlite3.Connection:
conn = sqlite3.connect(path)
conn.execute("PRAGMA foreign_keys=ON;")
return conn
def insert_entity(
conn: sqlite3.Connection,
eid: str,
name: str,
type_ref: str,
metadata: dict,
source: str,
) -> None:
ts = now_utc_iso()
conn.execute(
"""INSERT OR REPLACE INTO entities
(id, name, type_ref, status, description, domain, tags,
source, metadata, notes, created_at, updated_at)
VALUES (?, ?, ?, 'active', '', '', '[]', ?, ?, '', ?, ?)""",
(eid, name, type_ref, source, json.dumps(metadata), ts, ts),
)
def insert_execution(
conn: sqlite3.Connection,
pipeline_id: str,
started_at: str,
ended_at: str,
duration_ms: int,
records_in: int,
records_out: int,
status: str,
error: str,
metrics: dict,
) -> None:
ts = now_utc_iso()
conn.execute(
"""INSERT INTO executions
(id, pipeline_id, relation_id, status, started_at, ended_at,
duration_ms, records_in, records_out, error, metrics, created_at)
VALUES (?, ?, '', ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
(
f"e_{uuid.uuid4().hex[:12]}",
pipeline_id,
status,
started_at,
ended_at,
duration_ms,
records_in,
records_out,
error,
json.dumps(metrics),
ts,
),
)
def main() -> int:
raw = sys.stdin.read()
try:
ctx = json.loads(raw) if raw.strip() else {}
except Exception as e:
log(f"stdin not valid JSON: {e}")
return 2
ops_db_path = ctx.get("ops_db_path") or "operations.db"
registry_root = ctx.get("registry_root") or os.environ.get("FN_REGISTRY_ROOT", "")
params = ctx.get("params") or {}
limit = int(params.get("limit", 30))
timeout_s = int(params.get("timeout_s", 15))
load_registry_funcs(registry_root)
try:
from infra.http_get_json import http_get_json
except ImportError:
# Fallback: stdlib urllib directo si el registry no esta disponible.
log("registry funcs unavailable; falling back to urllib")
import urllib.request
def http_get_json(url, headers=None, params=None, timeout=30.0):
req = urllib.request.Request(url, headers={"Accept": "application/json"})
with urllib.request.urlopen(req, timeout=timeout) as r:
return json.loads(r.read().decode("utf-8"))
started = time.time()
started_iso = now_utc_iso()
progress(0.05, "fetch top ids")
try:
ids = http_get_json(
"https://hacker-news.firebaseio.com/v0/topstories.json",
timeout=timeout_s,
)
if not isinstance(ids, list):
raise RuntimeError(f"expected list, got {type(ids).__name__}")
except Exception as e:
log(f"fetch topstories failed: {e}")
ended_iso = now_utc_iso()
try:
conn = open_ops_db(ops_db_path)
insert_execution(
conn, "api_hn_top", started_iso, ended_iso,
int((time.time() - started) * 1000), 0, 0, "failure",
str(e), {},
)
conn.commit()
conn.close()
except Exception:
pass
return 1
ids = ids[:limit]
progress(0.2, f"fetch {len(ids)} stories")
items = []
n_added = 0
try:
conn = open_ops_db(ops_db_path)
except Exception as e:
log(f"open ops_db failed: {e}")
return 1
for i, sid in enumerate(ids):
try:
story = http_get_json(
f"https://hacker-news.firebaseio.com/v0/item/{sid}.json",
timeout=timeout_s,
)
except Exception as e:
log(f"item {sid}: {e}")
continue
if not isinstance(story, dict):
continue
eid = f"hn_{sid}"
title = story.get("title") or "(untitled)"
meta = {
"hn_id": sid,
"title": title,
"url": story.get("url") or "",
"score": story.get("score"),
"by": story.get("by"),
"time": story.get("time"),
"type": story.get("type"),
"descendants": story.get("descendants"),
}
try:
insert_entity(conn, eid, title, "HnStory", meta, "api_hn_top")
n_added += 1
items.append({"id": eid, "title": title})
except Exception as e:
log(f"insert {eid}: {e}")
continue
progress(0.2 + 0.7 * (i + 1) / len(ids), f"{i+1}/{len(ids)}")
ended_iso = now_utc_iso()
duration_ms = int((time.time() - started) * 1000)
try:
insert_execution(
conn, "api_hn_top", started_iso, ended_iso, duration_ms,
len(ids), n_added, "success", "",
{"limit": limit, "fetched": len(ids), "stored": n_added},
)
conn.commit()
finally:
conn.close()
progress(1.0, "done")
summary = {
"entities_added": n_added,
"items_total": len(ids),
"duration_ms": duration_ms,
}
sys.stdout.write(json.dumps(summary) + "\n")
sys.stdout.flush()
return 0
if __name__ == "__main__":
sys.exit(main())
+110
View File
@@ -0,0 +1,110 @@
#include "data_registry.h"
#include <sqlite3.h>
#include <cstdio>
#include <cstring>
namespace {
std::string col_text(sqlite3_stmt* st, int i) {
const unsigned char* t = sqlite3_column_text(st, i);
return t ? std::string(reinterpret_cast<const char*>(t)) : std::string();
}
bool fill_row(sqlite3_stmt* st, RegistryRow& row) {
row.id = col_text(st, 0);
row.name = col_text(st, 1);
row.kind = col_text(st, 2);
row.lang = col_text(st, 3);
row.domain = col_text(st, 4);
row.purity = col_text(st, 5);
row.signature = col_text(st, 6);
row.description = col_text(st, 7);
return true;
}
// FTS5 escape: el caller pasa texto libre; lo encerramos en comillas dobles
// y duplicamos las internas. No interpretamos operadores FTS5.
std::string fts5_quote(const std::string& q) {
std::string out;
out.reserve(q.size() + 4);
out.push_back('"');
for (char c : q) {
if (c == '"') out += "\"\"";
else out.push_back(c);
}
out.push_back('"');
return out;
}
} // namespace
bool registry_open(OdrRegistry& r, const std::string& db_path) {
if (r.db) sqlite3_close(r.db);
r.db = nullptr;
int rc = sqlite3_open_v2(db_path.c_str(), &r.db, SQLITE_OPEN_READONLY,
nullptr);
if (rc != SQLITE_OK) {
std::fprintf(stderr, "[odr_console] sqlite_open: %s\n",
sqlite3_errmsg(r.db));
if (r.db) { sqlite3_close(r.db); r.db = nullptr; }
return false;
}
return true;
}
void registry_close(OdrRegistry& r) {
if (r.db) sqlite3_close(r.db);
r.db = nullptr;
}
bool registry_list_recent(OdrRegistry& r, int limit,
std::vector<RegistryRow>& out) {
out.clear();
if (!r.db) return false;
const char* sql =
"SELECT id, name, kind, lang, domain, purity, signature, description "
"FROM functions ORDER BY updated_at DESC LIMIT ?;";
sqlite3_stmt* st = nullptr;
if (sqlite3_prepare_v2(r.db, sql, -1, &st, nullptr) != SQLITE_OK) {
return false;
}
sqlite3_bind_int(st, 1, limit);
while (sqlite3_step(st) == SQLITE_ROW) {
RegistryRow row;
fill_row(st, row);
out.push_back(std::move(row));
}
sqlite3_finalize(st);
return true;
}
bool registry_search(OdrRegistry& r, const std::string& query, int limit,
std::vector<RegistryRow>& out) {
out.clear();
if (!r.db) return false;
std::string match = fts5_quote(query);
const char* sql =
"SELECT f.id, f.name, f.kind, f.lang, f.domain, f.purity, "
" f.signature, f.description "
"FROM functions f "
"WHERE f.id IN (SELECT id FROM functions_fts WHERE functions_fts MATCH ?) "
"ORDER BY f.name LIMIT ?;";
sqlite3_stmt* st = nullptr;
if (sqlite3_prepare_v2(r.db, sql, -1, &st, nullptr) != SQLITE_OK) {
std::fprintf(stderr, "[odr_console] prepare_v2 search: %s\n",
sqlite3_errmsg(r.db));
return false;
}
sqlite3_bind_text(st, 1, match.c_str(), -1, SQLITE_TRANSIENT);
sqlite3_bind_int(st, 2, limit);
while (sqlite3_step(st) == SQLITE_ROW) {
RegistryRow row;
fill_row(st, row);
out.push_back(std::move(row));
}
sqlite3_finalize(st);
return true;
}
+32
View File
@@ -0,0 +1,32 @@
#pragma once
#include <string>
#include <vector>
struct sqlite3;
struct OdrRegistry {
sqlite3* db = nullptr;
};
struct RegistryRow {
std::string id;
std::string name;
std::string kind; // function | pipeline | component
std::string lang; // go | py | bash | ts | cpp
std::string domain;
std::string purity; // pure | impure
std::string signature;
std::string description;
};
bool registry_open(OdrRegistry& r, const std::string& db_path);
void registry_close(OdrRegistry& r);
// Lista las N funciones mas recientes (ORDER BY updated_at DESC).
bool registry_list_recent(OdrRegistry& r, int limit,
std::vector<RegistryRow>& out);
// FTS5 search sobre name + description + tags + signature + code.
bool registry_search(OdrRegistry& r, const std::string& query, int limit,
std::vector<RegistryRow>& out);
+185
View File
@@ -0,0 +1,185 @@
// odr_console — lanzador GUI de funciones del registry para recolectar datos online.
// MVP: panel launcher + placeholders. Ver issue 0066.
#include "app_base.h"
#include "imgui.h"
#include "core/app_menubar.h"
#include "core/app_about.h"
#include "core/app_settings.h"
#include "core/icon_font.h"
#include "core/icons_tabler.h"
#include "core/tokens.h"
#include "core/logger.h"
#include "data_registry.h"
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <string>
#include <vector>
static OdrRegistry g_registry;
static std::string g_db_path;
static char g_search_buf[256] = "";
static std::vector<RegistryRow> g_results;
static int g_selected = -1;
static void do_search() {
g_results.clear();
g_selected = -1;
if (g_search_buf[0] == '\0') {
registry_list_recent(g_registry, 50, g_results);
} else {
registry_search(g_registry, g_search_buf, 50, g_results);
}
}
static bool g_show_launcher = true;
static bool g_show_jobs = true;
static bool g_show_datasets = true;
static void draw_launcher() {
if (!g_show_launcher) return;
if (!ImGui::Begin(TI_SEARCH " Launcher", &g_show_launcher)) {
ImGui::End();
return;
}
ImGui::PushItemWidth(-1);
if (ImGui::InputTextWithHint("##search", "Search functions/pipelines (FTS5)...",
g_search_buf, sizeof(g_search_buf),
ImGuiInputTextFlags_EnterReturnsTrue)) {
do_search();
}
ImGui::PopItemWidth();
if (ImGui::Button("Search")) do_search();
ImGui::SameLine();
ImGui::TextDisabled("%zu hits", g_results.size());
ImGui::Separator();
if (ImGui::BeginTable("##results", 4,
ImGuiTableFlags_RowBg | ImGuiTableFlags_Borders |
ImGuiTableFlags_ScrollY | ImGuiTableFlags_Resizable)) {
ImGui::TableSetupColumn("ID", ImGuiTableColumnFlags_WidthStretch);
ImGui::TableSetupColumn("Kind", ImGuiTableColumnFlags_WidthFixed, 80);
ImGui::TableSetupColumn("Domain", ImGuiTableColumnFlags_WidthFixed, 100);
ImGui::TableSetupColumn("Description", ImGuiTableColumnFlags_WidthStretch);
ImGui::TableHeadersRow();
for (int i = 0; i < (int)g_results.size(); ++i) {
const auto& r = g_results[i];
ImGui::TableNextRow();
ImGui::TableSetColumnIndex(0);
bool sel = (i == g_selected);
if (ImGui::Selectable(r.id.c_str(), sel,
ImGuiSelectableFlags_SpanAllColumns)) {
g_selected = i;
}
ImGui::TableSetColumnIndex(1);
ImGui::TextUnformatted(r.kind.c_str());
ImGui::TableSetColumnIndex(2);
ImGui::TextUnformatted(r.domain.c_str());
ImGui::TableSetColumnIndex(3);
ImGui::TextUnformatted(r.description.c_str());
}
ImGui::EndTable();
}
if (g_selected >= 0 && g_selected < (int)g_results.size()) {
ImGui::Separator();
const auto& r = g_results[g_selected];
ImGui::Text("Selected: %s", r.id.c_str());
ImGui::TextWrapped("Signature: %s", r.signature.c_str());
ImGui::Spacing();
ImGui::BeginDisabled(true);
ImGui::Button(TI_PLAYER_PLAY " Run");
ImGui::EndDisabled();
ImGui::SameLine();
ImGui::TextDisabled("(jobs system pending — issue 0065)");
}
ImGui::End();
}
static void draw_jobs() {
if (!g_show_jobs) return;
if (!ImGui::Begin(TI_LIST " Jobs", &g_show_jobs)) {
ImGui::End();
return;
}
ImGui::TextDisabled("Jobs queue panel — pendiente issue 0065");
ImGui::TextWrapped(
"Cuando jobs_pool_cpp_core este extraido del graph_explorer al "
"registry, este panel mostrara cola/running/done con live progress. "
"Ver dev/issues/0067-odr-osint-prereqs-roadmap.md");
ImGui::End();
}
static void draw_datasets() {
if (!g_show_datasets) return;
if (!ImGui::Begin(TI_DATABASE " Datasets", &g_show_datasets)) {
ImGui::End();
return;
}
ImGui::TextDisabled("DuckDB browser — pendiente fase 2 del MVP");
ImGui::End();
}
static void render() {
static fn_ui::PanelToggle panels[] = {
{ "Launcher", nullptr, &g_show_launcher },
{ "Jobs", nullptr, &g_show_jobs },
{ "Datasets", nullptr, &g_show_datasets },
};
fn_ui::app_menubar(panels,
sizeof(panels) / sizeof(panels[0]),
nullptr);
draw_launcher();
draw_jobs();
draw_datasets();
}
int main(int argc, char** argv) {
// CLI: opcional, primer arg = path a registry.db (override).
if (argc >= 2) {
g_db_path = argv[1];
}
if (g_db_path.empty()) {
const char* env = std::getenv("FN_REGISTRY_DB");
if (env && *env) g_db_path = env;
}
if (g_db_path.empty()) {
const char* root = std::getenv("FN_REGISTRY_ROOT");
if (root && *root) {
g_db_path = std::string(root) + "/registry.db";
}
}
if (g_db_path.empty()) {
std::fprintf(stderr,
"[odr_console] No registry.db path. Pass as arg or set "
"FN_REGISTRY_DB / FN_REGISTRY_ROOT.\n");
} else if (!registry_open(g_registry, g_db_path)) {
std::fprintf(stderr,
"[odr_console] Failed to open registry.db: %s\n",
g_db_path.c_str());
} else {
registry_list_recent(g_registry, 50, g_results);
}
fn::AppConfig cfg;
cfg.title = "odr_console — online data recopilation";
cfg.width = 1400;
cfg.height = 900;
cfg.about = { "odr_console", "0.1.0",
"Lanzador GUI de funciones del registry para recolectar "
"datos online (APIs, scraping, browser CDP). MVP." };
cfg.log = { "odr_console.log", 1 };
int rc = fn::run_app(cfg, render);
registry_close(g_registry);
return rc;
}
BIN
View File
Binary file not shown.