Files
fn_registry/cpp/tests/test_graph_sources.cpp
egutierrez 54cee13e8e feat(viz): graph_sources lector operations.db + streaming (issue 0049g)
- graph_load_from_operations: SQLite read-only, schema-detect (type_ref/type,
  from_entity/source, to_entity/target, name/type, weight, updated_at).
- 16-color indigo palette por hash FNV1a32 del nombre de tipo. user_data
  por nodo es FNV1a64(entity.id) — deterministico entre cargas.
- Label pool interno: metadata.name (JSON simple) > entities.name > id.
- graph_free libera nodes/edges/types/rel_types/labels/strdup'd names via
  arena_map (GraphData* -> arena).
- Streaming pull-based con tiebreak (updated_at, id) y crecimiento x2 de
  capacidad. Tipos nuevos descubiertos en stream se anaden a types.
- Tests: fixture in-memory (3 entity types, 2 rel types, 10 entities,
  15 relations) + smoke contra apps/script_navegador/operations.db.
- Issue movido a completed/.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-29 23:12:31 +02:00

266 lines
9.3 KiB
C++

// Unit tests para graph_sources (issue 0049g).
// Genera el fixture operations.db en runtime sobre un fichero temporal
// (no se versiona binario). Cubre carga sincrona, conteos, determinismo
// del user_data, resolucion de aristas, y streaming pull-based.
#define CATCH_CONFIG_MAIN
#include "catch_amalgamated.hpp"
#include "viz/graph_sources.h"
#include "viz/graph_types.h"
#include "../vendor/sqlite3/sqlite3.h"
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <string>
#include <unordered_set>
// ---------------------------------------------------------------------------
// Fixture: 3 entity types (Person/Email/Domain), 2 relation types
// (owns/connects), 10 entities, 15 relations. Schema = el del registry
// (type_ref, from_entity, to_entity, weight, name, updated_at).
// ---------------------------------------------------------------------------
static const char* kSchemaSQL =
"CREATE TABLE entities ("
" id TEXT PRIMARY KEY,"
" name TEXT NOT NULL DEFAULT '',"
" type_ref TEXT NOT NULL,"
" status TEXT NOT NULL DEFAULT 'active',"
" metadata TEXT NOT NULL DEFAULT '{}',"
" created_at TEXT NOT NULL DEFAULT '2026-01-01T00:00:00Z',"
" updated_at TEXT NOT NULL DEFAULT '2026-01-01T00:00:00Z'"
");"
"CREATE TABLE relations ("
" id TEXT PRIMARY KEY,"
" name TEXT NOT NULL,"
" from_entity TEXT NOT NULL,"
" to_entity TEXT NOT NULL,"
" weight REAL,"
" created_at TEXT NOT NULL DEFAULT '2026-01-01T00:00:00Z',"
" updated_at TEXT NOT NULL DEFAULT '2026-01-01T00:00:00Z'"
");";
static void exec_or_die(sqlite3* db, const char* sql) {
char* err = nullptr;
int rc = sqlite3_exec(db, sql, nullptr, nullptr, &err);
if (rc != SQLITE_OK) {
std::fprintf(stderr, "sql failed: %s\n", err ? err : "?");
sqlite3_free(err);
std::abort();
}
}
static std::string make_fixture(const char* suffix = "") {
char buf[L_tmpnam];
std::tmpnam(buf);
std::string path = std::string(buf) + suffix + ".db";
std::remove(path.c_str());
sqlite3* db = nullptr;
REQUIRE(sqlite3_open(path.c_str(), &db) == SQLITE_OK);
exec_or_die(db, kSchemaSQL);
// 10 entities: 4 Person, 4 Email, 2 Domain
const char* entities[10][3] = {
{"p1", "Person", "Alice"},
{"p2", "Person", "Bob"},
{"p3", "Person", "Carol"},
{"p4", "Person", "Dave"},
{"e1", "Email", "alice@a.com"},
{"e2", "Email", "bob@b.com"},
{"e3", "Email", "carol@c.com"},
{"e4", "Email", "dave@d.com"},
{"d1", "Domain", "a.com"},
{"d2", "Domain", "b.com"},
};
for (auto& e : entities) {
char sql[512];
std::snprintf(sql, sizeof(sql),
"INSERT INTO entities (id, name, type_ref, metadata) VALUES "
"('%s','%s','%s','{\"name\":\"%s\"}');", e[0], e[2], e[1], e[2]);
exec_or_die(db, sql);
}
// 15 relations
const char* rels[15][4] = {
{"r1", "owns", "p1", "e1"},
{"r2", "owns", "p2", "e2"},
{"r3", "owns", "p3", "e3"},
{"r4", "owns", "p4", "e4"},
{"r5", "owns", "e1", "d1"},
{"r6", "owns", "e2", "d2"},
{"r7", "connects", "p1", "p2"},
{"r8", "connects", "p2", "p3"},
{"r9", "connects", "p3", "p4"},
{"r10", "connects", "p4", "p1"},
{"r11", "connects", "e1", "e2"},
{"r12", "connects", "e3", "e4"},
{"r13", "connects", "d1", "d2"},
{"r14", "owns", "p1", "e3"},
{"r15", "owns", "p2", "e4"},
};
for (auto& r : rels) {
char sql[512];
std::snprintf(sql, sizeof(sql),
"INSERT INTO relations (id, name, from_entity, to_entity, weight) VALUES "
"('%s','%s','%s','%s', 1.0);", r[0], r[1], r[2], r[3]);
exec_or_die(db, sql);
}
sqlite3_close(db);
return path;
}
// ---------------------------------------------------------------------------
// Fase 3.1 / 3.2 — carga sincrona
// ---------------------------------------------------------------------------
TEST_CASE("graph_load_from_operations: conteos y tipos", "[graph_sources]") {
std::string path = make_fixture();
GraphData g{};
graph::GraphLoadStats s{};
REQUIRE(graph::graph_load_from_operations(path.c_str(), &g, &s) == true);
CHECK(s.errors == 0);
CHECK(s.nodes_loaded == 10);
CHECK(s.edges_loaded == 15);
CHECK(s.types_discovered == 3);
CHECK(s.rel_types_discovered == 2);
CHECK(g.node_count == 10);
CHECK(g.edge_count == 15);
CHECK(g.type_count == 3);
CHECK(g.rel_type_count == 2);
// Cada nodo apunta a un type_id valido.
for (int i = 0; i < g.node_count; ++i) {
CHECK(g.nodes[i].type_id < g.type_count);
}
// Aristas resuelven a indices validos.
for (int i = 0; i < g.edge_count; ++i) {
CHECK(g.edges[i].source < (uint32_t)g.node_count);
CHECK(g.edges[i].target < (uint32_t)g.node_count);
CHECK(g.edges[i].type_id < g.rel_type_count);
}
graph::graph_free(&g);
CHECK(g.nodes == nullptr);
CHECK(g.node_count == 0);
std::remove(path.c_str());
}
TEST_CASE("graph_load_from_operations: user_data deterministico", "[graph_sources]") {
std::string path = make_fixture("_a");
GraphData g1{}; graph::GraphLoadStats s1{};
REQUIRE(graph::graph_load_from_operations(path.c_str(), &g1, &s1));
// user_data unicos y reproducibles entre cargas
std::unordered_set<uint64_t> seen;
for (int i = 0; i < g1.node_count; ++i) {
CHECK(g1.nodes[i].user_data != 0);
CHECK(seen.insert(g1.nodes[i].user_data).second);
}
GraphData g2{}; graph::GraphLoadStats s2{};
REQUIRE(graph::graph_load_from_operations(path.c_str(), &g2, &s2));
// Mismo orden de insercion → mismo user_data en cada slot.
for (int i = 0; i < g1.node_count; ++i) {
CHECK(g1.nodes[i].user_data == g2.nodes[i].user_data);
}
graph::graph_free(&g1);
graph::graph_free(&g2);
std::remove(path.c_str());
}
TEST_CASE("graph_load_from_operations: error si BD no existe", "[graph_sources]") {
GraphData g{}; graph::GraphLoadStats s{};
bool ok = graph::graph_load_from_operations("/nonexistent/path/xyz.db", &g, &s);
CHECK(ok == false);
CHECK(s.errors >= 1);
CHECK(std::strlen(s.error_msg) > 0);
}
TEST_CASE("graph_load_from_operations: error si falta tabla entities", "[graph_sources]") {
char buf[L_tmpnam];
std::tmpnam(buf);
std::string path = std::string(buf) + "_empty.db";
std::remove(path.c_str());
sqlite3* db = nullptr;
REQUIRE(sqlite3_open(path.c_str(), &db) == SQLITE_OK);
sqlite3_close(db);
GraphData g{}; graph::GraphLoadStats s{};
bool ok = graph::graph_load_from_operations(path.c_str(), &g, &s);
CHECK(ok == false);
CHECK(s.errors >= 1);
std::remove(path.c_str());
}
TEST_CASE("graph_label devuelve nombre desde metadata.name", "[graph_sources]") {
std::string path = make_fixture("_lab");
GraphData g{}; graph::GraphLoadStats s{};
REQUIRE(graph::graph_load_from_operations(path.c_str(), &g, &s));
REQUIRE(g.node_count > 0);
const char* label = graph::graph_label(&g, g.nodes[0].label_idx);
CHECK(std::strlen(label) > 0);
// El primer entity tiene metadata.name = "Alice" (segun el insert).
CHECK(std::string(label) == "Alice");
graph::graph_free(&g);
std::remove(path.c_str());
}
// ---------------------------------------------------------------------------
// Fase 3.3 — streaming
// ---------------------------------------------------------------------------
TEST_CASE("graph_stream: detecta filas nuevas", "[graph_sources][stream]") {
std::string path = make_fixture("_stream");
GraphData g{}; graph::GraphLoadStats s{};
REQUIRE(graph::graph_load_from_operations(path.c_str(), &g, &s));
int initial_nodes = g.node_count;
int initial_edges = g.edge_count;
auto* src = graph::graph_stream_operations_open(path.c_str(), 100);
REQUIRE(src != nullptr);
// Sin cambios, primer pull no aniade nada.
int n = graph::graph_stream_pull(src, &g);
INFO("first pull n=" << n << " node_count=" << g.node_count
<< " (initial=" << initial_nodes << ") edge_count=" << g.edge_count
<< " (initial=" << initial_edges << ")");
CHECK(n == 0);
CHECK(g.node_count == initial_nodes);
CHECK(g.edge_count == initial_edges);
// Insertar dos entities y una relacion con updated_at posterior.
sqlite3* db = nullptr;
REQUIRE(sqlite3_open(path.c_str(), &db) == SQLITE_OK);
exec_or_die(db,
"INSERT INTO entities (id, name, type_ref, updated_at) VALUES "
"('p99', 'Eve', 'Person', '2027-01-01T00:00:00Z'),"
"('e99', 'eve@x.com', 'Email', '2027-01-01T00:00:00Z');"
"INSERT INTO relations (id, name, from_entity, to_entity, weight, updated_at) VALUES "
"('r99', 'owns', 'p99', 'e99', 1.0, '2027-01-01T00:00:00Z');");
sqlite3_close(db);
n = graph::graph_stream_pull(src, &g);
CHECK(n >= 2); // 2 entities + 1 relacion
CHECK(g.node_count == initial_nodes + 2);
CHECK(g.edge_count == initial_edges + 1);
// Idempotencia: segundo pull no aniade.
n = graph::graph_stream_pull(src, &g);
CHECK(n == 0);
graph::graph_stream_close(src);
graph::graph_free(&g);
std::remove(path.c_str());
}