#include "data.h" #include "../../../../cpp/vendor/sqlite3/sqlite3.h" #include #include #include #include #include #include #include #include namespace ge { // FNV1a-64 — debe coincidir con graph_sources.cpp y entity_ops.cpp para que // los `user_data` calculados en este archivo casen con los del loader. static uint64_t gf_fnv1a64(const char* s) { uint64_t h = 1469598103934665603ULL; for (; s && *s; ++s) { h ^= (uint8_t)*s; h *= 1099511628211ULL; } return h; } bool load_graph(const InputArgs& args, GraphData* out, graph::GraphLoadStats* stats) { if (!out || !stats) return false; *stats = graph::GraphLoadStats{}; if (!args.uri || !*args.uri) { stats->errors = 1; std::snprintf(stats->error_msg, sizeof(stats->error_msg), "no input uri"); return false; } switch (args.kind) { case INPUT_OPERATIONS: return graph::graph_load_from_operations(args.uri, out, stats); case INPUT_NONE: default: stats->errors = 1; std::snprintf(stats->error_msg, sizeof(stats->error_msg), "unsupported input kind"); return false; } } bool reload_graph(const InputArgs& args, GraphData* out, graph::GraphLoadStats* stats, const std::unordered_map* group_expanded) { if (out) graph::graph_free(out); bool ok = load_graph(args, out, stats); if (!ok) return false; if (group_expanded && args.uri && *args.uri) { // Best-effort: si falla la consulta de group_id, dejamos el grafo // sin filtrar — el caller ya tiene un grafo valido. apply_group_filter(out, args.uri, *group_expanded); } return true; } // ---------------------------------------------------------------------------- // apply_group_filter (issue 0035b) // ---------------------------------------------------------------------------- namespace { // Detecta si la columna `group_id` existe en `entities`. Sin la columna, el // filtro no tiene nada que hacer y devuelve sin tocar el grafo. La migracion // (issue 0035a) la anade en BDs nuevas y existentes; pero si por algun motivo // abrimos una BD vieja antes de que migrate corra, no debemos petar. bool has_group_id_column(sqlite3* db) { sqlite3_stmt* st = nullptr; if (sqlite3_prepare_v2(db, "PRAGMA table_info(entities)", -1, &st, nullptr) != SQLITE_OK) return false; bool found = false; while (sqlite3_step(st) == SQLITE_ROW) { const unsigned char* name = sqlite3_column_text(st, 1); if (name && std::strcmp((const char*)name, "group_id") == 0) { found = true; break; } } sqlite3_finalize(st); return found; } // Detecta el nombre de la columna de tipo en entities (puede ser type_ref o // type segun la version del schema). std::string entity_type_column(sqlite3* db) { sqlite3_stmt* st = nullptr; if (sqlite3_prepare_v2(db, "PRAGMA table_info(entities)", -1, &st, nullptr) != SQLITE_OK) return "type_ref"; std::string col = "type_ref"; bool seen_type_ref = false; bool seen_type = false; while (sqlite3_step(st) == SQLITE_ROW) { const unsigned char* name = sqlite3_column_text(st, 1); if (!name) continue; if (std::strcmp((const char*)name, "type_ref") == 0) seen_type_ref = true; if (std::strcmp((const char*)name, "type") == 0) seen_type = true; } sqlite3_finalize(st); if (seen_type_ref) col = "type_ref"; else if (seen_type) col = "type"; return col; } } // anon bool apply_group_filter(GraphData* g, const char* db_path, const std::unordered_map& group_expanded) { if (!g || !db_path || !*db_path) return false; if (g->node_count <= 0) return true; sqlite3* db = nullptr; if (sqlite3_open(db_path, &db) != SQLITE_OK) { if (db) sqlite3_close(db); return false; } if (!has_group_id_column(db)) { sqlite3_close(db); return true; // schema antiguo: nada que filtrar } std::string type_col = entity_type_column(db); // Lee (id, group_id, type) para todas las entidades. Construimos: // - hash_to_entity_id: user_data (FNV1a64 del id) → string id // - entity_to_group_id: id string → group_id string ("" si NULL) // - is_group_type: id string → true si type == "Group" std::unordered_map hash_to_entity_id; std::unordered_map entity_to_group_id; std::unordered_set group_entities; { std::string q = "SELECT id, group_id, " + type_col + " FROM entities"; sqlite3_stmt* st = nullptr; if (sqlite3_prepare_v2(db, q.c_str(), -1, &st, nullptr) != SQLITE_OK) { sqlite3_close(db); return false; } while (sqlite3_step(st) == SQLITE_ROW) { const unsigned char* id_c = sqlite3_column_text(st, 0); const unsigned char* gid_c = sqlite3_column_text(st, 1); const unsigned char* tp_c = sqlite3_column_text(st, 2); if (!id_c) continue; std::string id_s = (const char*)id_c; std::string gid_s = gid_c ? (const char*)gid_c : ""; std::string type_s = tp_c ? (const char*)tp_c : ""; hash_to_entity_id[gf_fnv1a64(id_s.c_str())] = id_s; if (!gid_s.empty()) entity_to_group_id[id_s] = gid_s; if (type_s == "Group") group_entities.insert(id_s); } sqlite3_finalize(st); } sqlite3_close(db); // Helper: devuelve true si el entity_id `eid` es un grupo expandido. auto is_expanded = [&](const std::string& eid) -> bool { auto it = group_expanded.find(eid); return it != group_expanded.end() && it->second; }; // Para cada nodo del grafo, decidir: // - hidden: se elimina del array final // - effective_node_idx: indice del nodo en el array nuevo, o -1 si oculto // - redirect_to_group_idx: si oculto y su grupo existe en el grafo y NO // esta expandido, las aristas se redirigen al indice del grupo en el // array nuevo. Si el grupo no existe en el grafo (orfano), se deja -1 // y las aristas se descartan. // Construimos primero un map hash→old_idx y entity_id→old_idx para // resolver rapido los grupos. std::unordered_map entity_id_to_old_idx; entity_id_to_old_idx.reserve((size_t)g->node_count); for (int i = 0; i < g->node_count; ++i) { auto it = hash_to_entity_id.find(g->nodes[i].user_data); if (it != hash_to_entity_id.end()) { entity_id_to_old_idx[it->second] = i; } } // Decidir visibility por nodo. std::vector hidden((size_t)g->node_count, 0); std::vector redirect_old(g->node_count, -1); // old_idx → old_idx del grupo (si oculto) for (int i = 0; i < g->node_count; ++i) { auto hit = hash_to_entity_id.find(g->nodes[i].user_data); if (hit == hash_to_entity_id.end()) continue; const std::string& eid = hit->second; auto git = entity_to_group_id.find(eid); if (git == entity_to_group_id.end()) continue; const std::string& parent = git->second; if (is_expanded(parent)) continue; // grupo expandido → visible // Nodo oculto. Buscar el indice del grupo padre en el grafo. auto pit = entity_id_to_old_idx.find(parent); if (pit != entity_id_to_old_idx.end()) { redirect_old[i] = pit->second; hidden[i] = 1; } else { // Grupo padre no presente en el grafo cargado — ocultamos sin // redirigir. Las aristas se descartan. hidden[i] = 1; } } // Compactar nodos: nuevo array sin los ocultos. Mantenemos un map old→new. std::vector old_to_new((size_t)g->node_count, -1); int new_count = 0; for (int i = 0; i < g->node_count; ++i) { if (hidden[i]) continue; old_to_new[i] = new_count; if (new_count != i) g->nodes[new_count] = g->nodes[i]; ++new_count; } int original_node_count = g->node_count; g->node_count = new_count; // Helper: dado old_idx, devuelve el new_idx efectivo (redirige al grupo // si oculto). Devuelve -1 si imposible (oculto sin grupo en el grafo). auto effective_new_idx = [&](int old_idx) -> int { if (old_idx < 0 || old_idx >= original_node_count) return -1; if (!hidden[old_idx]) return old_to_new[old_idx]; int g_old = redirect_old[old_idx]; if (g_old < 0) return -1; return old_to_new[g_old]; // el grupo es siempre visible }; // Filtrar/redirigir aristas. // Para deduplicacion grupo-a-grupo: clave = (min(grp_old_a, grp_old_b), // max(...), rel_type_id). Asi mantenemos UNA arista por par + tipo. std::unordered_set seen_grp_pairs; auto pair_key = [](int a, int b, uint16_t rt) -> uint64_t { if (a > b) std::swap(a, b); // a, b en uint32 + rt en 16 bits. Combinamos. uint64_t k = (uint64_t)(uint32_t)a; k = (k << 24) ^ (uint64_t)(uint32_t)b; k = (k << 16) ^ (uint64_t)rt; return k; }; int new_edge_count = 0; for (int e = 0; e < g->edge_count; ++e) { GraphEdge& edge = g->edges[e]; int s_old = (int)edge.source; int t_old = (int)edge.target; if (s_old < 0 || s_old >= original_node_count || t_old < 0 || t_old >= original_node_count) continue; bool s_hidden = hidden[s_old] != 0; bool t_hidden = hidden[t_old] != 0; int s_new = effective_new_idx(s_old); int t_new = effective_new_idx(t_old); if (s_new < 0 || t_new < 0) continue; // descarte por orfandad // Caso 1: ambos extremos eran hijos de grupo(s) colapsado(s). if (s_hidden && t_hidden) { // Si caen en el MISMO grupo → arista interna, se descarta // (issue 0035 decision 5). int s_grp_old = redirect_old[s_old]; int t_grp_old = redirect_old[t_old]; if (s_grp_old < 0 || t_grp_old < 0) continue; if (s_grp_old == t_grp_old) continue; // Distintos grupos → dedup por par + rel_type. uint64_t k = pair_key(s_grp_old, t_grp_old, edge.type_id); if (seen_grp_pairs.count(k)) continue; seen_grp_pairs.insert(k); edge.source = (uint32_t)s_new; edge.target = (uint32_t)t_new; } else if (s_hidden || t_hidden) { // Cross-edge: un extremo dentro de grupo colapsado, otro fuera. // Redirigimos el extremo oculto al grupo. Sin dedup (issue 0035 // decision 5: una linea por arista cuando es single-cross). edge.source = (uint32_t)s_new; edge.target = (uint32_t)t_new; } else { // Ambos visibles — arista normal. edge.source = (uint32_t)s_new; edge.target = (uint32_t)t_new; } if (new_edge_count != e) g->edges[new_edge_count] = edge; ++new_edge_count; } g->edge_count = new_edge_count; g->update_bounds(); return true; } } // namespace ge