d6d7b03d09
- app.md - appicon.ico - extract_panel.cpp - main.cpp - views.cpp Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
169 lines
6.6 KiB
Markdown
169 lines
6.6 KiB
Markdown
---
|
|
name: graph_explorer
|
|
lang: cpp
|
|
domain: viz
|
|
version: 0.1.0
|
|
description: "Visor de grafos GPU-accelerated agnostico del backend. Lee operations.db de cualquier app del registry y permite explorar entidades/relaciones con shapes/iconos/layouts/filtros."
|
|
tags: [imgui, graph, osint, visualization, gpu]
|
|
uses_functions:
|
|
# viz
|
|
- graph_renderer_cpp_viz
|
|
- graph_force_layout_cpp_viz
|
|
- graph_force_layout_gpu_cpp_viz
|
|
- graph_layouts_cpp_viz
|
|
- graph_viewport_cpp_viz
|
|
- graph_viewport_selection_cpp_viz
|
|
- graph_labels_cpp_viz
|
|
- graph_labels_select_cpp_viz
|
|
- graph_icons_cpp_viz
|
|
- graph_sources_cpp_viz
|
|
- graph_types_cpp_viz
|
|
# core
|
|
- graph_spatial_hash_cpp_core
|
|
- button_cpp_core
|
|
- icon_button_cpp_core
|
|
- toolbar_cpp_core
|
|
- modal_dialog_cpp_core
|
|
- text_input_cpp_core
|
|
- select_cpp_core
|
|
- tree_view_cpp_core
|
|
- page_header_cpp_core
|
|
- fullscreen_window_cpp_core
|
|
- badge_cpp_core
|
|
- empty_state_cpp_core
|
|
# paste & extract panel (issue 0013) — invoca enrichers/paste_extract/run.py
|
|
# via subprocess directo (no via jobs); uses extract_iocs + opcional hybrid.
|
|
- extract_iocs_py_cybersecurity
|
|
- extract_graph_hybrid_py_pipelines
|
|
uses_types: []
|
|
uses_modules: [data_table_cpp]
|
|
framework: "imgui"
|
|
entry_point: "main.cpp"
|
|
dir_path: "projects/osint_graph/apps/graph_explorer"
|
|
repo_url: "https://gitea-dgg044oo04woo4ggcsws4gk0.organic-machine.com/dataforge/graph_explorer"
|
|
icon:
|
|
phosphor: "graph"
|
|
accent: "#0891b2"
|
|
python_runtime: true
|
|
python_runtime_deps:
|
|
- requests
|
|
- certifi
|
|
- urllib3
|
|
- cryptography
|
|
|
|
# Validacion end-to-end (fase 4 del bucle reactivo). Ver issue 0068.
|
|
# C++ ImGui app: build con cmake, smoke via --self-test, tests pytest WSL.
|
|
e2e_checks:
|
|
- id: build
|
|
cmd: "cmake --build build --target graph_explorer -j"
|
|
timeout_s: 300
|
|
expect_exit: 0
|
|
- id: tests_pytest_wsl
|
|
cmd: "cd tests && python3 -m pytest -x -q"
|
|
timeout_s: 180
|
|
expect_exit: 0
|
|
- id: smoke_self_test
|
|
cmd: "./build/graph_explorer --self-test"
|
|
timeout_s: 30
|
|
expect_exit: 0
|
|
- id: enricher_fetch_webpage
|
|
cmd: "./build/graph_explorer --run-enricher fetch_webpage --target https://example.com --json"
|
|
timeout_s: 60
|
|
expect_stdout_contains: "\"status\":\"done\""
|
|
- id: ops_audit
|
|
ref: "fn-recopilador:projects/osint_graph/apps/graph_explorer"
|
|
---
|
|
|
|
## Arquitectura
|
|
|
|
App C++ ImGui para explorar cualquier `operations.db` del registry como un grafo
|
|
de entidades y relaciones. Agnostica del backend — el dispatcher en
|
|
`data.{h,cpp}` selecciona el `GraphLoadFn` segun `--input` (hoy solo
|
|
`operations`, manana `json`/`jsonl`/`graphml`).
|
|
|
|
**Capas:**
|
|
|
|
- `data.{h,cpp}` — dispatcher de sources. Hoy unica implementacion:
|
|
`graph_load_from_operations` (issue 0049g).
|
|
- `types_registry.{h,cpp}` — parser minimo de YAML para sobrescribir
|
|
`color`/`shape`/`icon`/`style` por nombre de tipo. Construye el `IconAtlas`
|
|
con los codepoints Tabler resueltos por `tabler_codepoint_by_name`.
|
|
- `views.{h,cpp}` — paneles `Toolbar`, `Legend`, `Inspector`, `Stats`. Toggle
|
|
via `AppConfig::panels`.
|
|
- `main.cpp` — CLI + `fn::run_app` + bucle de force layout (CPU/GPU) + glue.
|
|
- `graph_explorer.db` — SQLite junto al exe. Tabla `layouts(graph_hash,
|
|
node_id, x, y, pinned, updated_at)`. Persistencia de posiciones por grafo.
|
|
|
|
## CLI
|
|
|
|
```bash
|
|
graph_explorer [<operations.db>]
|
|
graph_explorer --input operations <path>
|
|
graph_explorer --types <yaml>
|
|
graph_explorer --layout force|grid|circular|radial|hierarchical|fixed
|
|
graph_explorer apps/registry_dashboard/operations.db
|
|
graph_explorer --types projects/osint_graph/apps/graph_explorer/examples/types.yaml \
|
|
apps/element_agents/operations.db
|
|
```
|
|
|
|
## Build
|
|
|
|
```bash
|
|
cd cpp
|
|
cmake -B build/linux -S .
|
|
cmake --build build/linux --target graph_explorer -j$(nproc)
|
|
./build/linux/apps/graph_explorer/graph_explorer apps/registry_dashboard/operations.db
|
|
```
|
|
|
|
## Notas
|
|
|
|
- Usa GPU layout si el contexto soporta compute 4.3; toggle CPU/GPU desde la
|
|
toolbar. Fallback transparente a CPU si GPU no esta disponible.
|
|
- 50k nodos a 60fps con layout GPU (medido en demos/graph en
|
|
`primitives_gallery`).
|
|
- `operations.db` se abre con `mode=ro` cuando el path no apunta al
|
|
filesystem propio para evitar lock con otras apps que esten escribiendo.
|
|
- El `graph_hash` se calcula a partir del path canonico del input. Mismo path
|
|
= mismo grafo a efectos de layout guardado.
|
|
|
|
### Pipeline NER+RE disponible en el registry (2026-05-04)
|
|
|
|
Tras la investigacion del analysis `gliner_glirel_tuning` (proyecto `osint_graph`), el stack completo de extraccion de entidades + relaciones desde texto / PDF esta listo como funciones del registry. **Esto desbloquea los issues 0041 y 0042**:
|
|
|
|
```python
|
|
# Pipeline E2E recomendado (texto -> grafo)
|
|
from pipelines.extract_graph_from_text import extract_graph_from_text
|
|
from datascience.gliner2_load_model import gliner2_load_model
|
|
|
|
model = gliner2_load_model() # Apache 2.0, NER+RE joint, 340M params
|
|
result = extract_graph_from_text(text, ENTITY_LABELS, RELATION_LABELS, ALLOWED, model)
|
|
# result = {'nodes': [...], 'edges': [...], 'stats': {...}}
|
|
```
|
|
|
|
Componentes (mira `python/functions/{core,datascience,pipelines}/`):
|
|
- **core (puras):** `clean_pdf_text`, `chunk_with_overlap`, `merge_entity_aliases`, `filter_relations_by_entity_types`, `aggregate_extraction_results`.
|
|
- **datascience (impuras):** `gliner2_load_model`, `extract_graph_gliner2`, `spacy_es_load_model`, `extract_triples_spacy_es` (OpenIE schema-less ES).
|
|
- **pipelines:** `extract_graph_from_text` — composicion E2E.
|
|
|
|
Recetas validadas en notebooks 04-08 del analysis y vaultadas en `vaults/osint_nlp_models/`:
|
|
- `threshold=0.3` (vs default 0.5) para GLiNER2.
|
|
- snake_case verbal labels (`works_at`, `ceo_of`...).
|
|
- `chunk_with_overlap` para texto > 1500 chars.
|
|
- `filter_relations_by_entity_types` para descartar `Madrid president_of Persona`.
|
|
- `merge_entity_aliases` para fusionar `BBVA` ⊂ `Banco Bilbao Vizcaya Argentaria, S.A.`.
|
|
- spaCy ES dep-rules como capa OpenIE schema-less complementaria (predicado = verbo del texto).
|
|
|
|
Issues que desbloquea: `issues/0041-split-confidence-thresholds.md` y `issues/0042-gliner2-unified-extractor.md`. El registry tiene todas las funciones necesarias; solo falta cablearlas en `extract_graph_hybrid_py_pipelines` y el panel `paste_extract`.
|
|
|
|
Playground de referencia: `projects/osint_graph/analysis/gliner_glirel_tuning/playground/` (FastAPI + Sigma.js, sirviendo en `localhost:7878`).
|
|
|
|
|
|
## Capability growth log
|
|
|
|
Una linea por bump SemVer. Bump-type segun `.claude/commands/version.md`:
|
|
- `major`: breaking observable (CLI args, schema BBDD propia, formato wire).
|
|
- `minor`: feature aditiva (nuevo panel, endpoint, opcion).
|
|
- `patch`: bugfix sin cambio observable.
|
|
|
|
- v0.1.0 (2026-05-18) — baseline.
|