graph_explorer/data.cpp

#include "data.h"

#include "../../../../cpp/vendor/sqlite3/sqlite3.h"

#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>

namespace ge {

// FNV1a-64 — debe coincidir con graph_sources.cpp y entity_ops.cpp para que
// los `user_data` calculados en este archivo casen con los del loader.
static uint64_t gf_fnv1a64(const char* s) {
    uint64_t h = 1469598103934665603ULL;
    for (; s && *s; ++s) {
        h ^= (uint8_t)*s;
        h *= 1099511628211ULL;
    }
    return h;
}

bool load_graph(const InputArgs& args, GraphData* out, graph::GraphLoadStats* stats) {
    if (!out || !stats) return false;
    *stats = graph::GraphLoadStats{};
    if (!args.uri || !*args.uri) {
        stats->errors = 1;
        std::snprintf(stats->error_msg, sizeof(stats->error_msg),
                      "no input uri");
        return false;
    }

    switch (args.kind) {
        case INPUT_OPERATIONS:
            return graph::graph_load_from_operations(args.uri, out, stats);
        case INPUT_NONE:
        default:
            stats->errors = 1;
            std::snprintf(stats->error_msg, sizeof(stats->error_msg),
                          "unsupported input kind");
            return false;
    }
}

bool reload_graph(const InputArgs& args, GraphData* out, graph::GraphLoadStats* stats,
                  const std::unordered_map<std::string, bool>* group_expanded) {
    if (out) graph::graph_free(out);
    bool ok = load_graph(args, out, stats);
    if (!ok) return false;
    if (args.uri && *args.uri) {
        // Issue 0035e: heredar iconografia/color del tipo mayoritario de
        // los hijos en cada Group homogeneo. Antes del filtro, asi el
        // type_id reasignado se preserva en el array compactado.
        apply_group_inherited_visuals(out, args.uri);
    }
    if (group_expanded && args.uri && *args.uri) {
        // Best-effort: si falla la consulta de group_id, dejamos el grafo
        // sin filtrar — el caller ya tiene un grafo valido.
        apply_group_filter(out, args.uri, *group_expanded);
    }
    return true;
}

// ----------------------------------------------------------------------------
// apply_group_filter (issue 0035b)
// ----------------------------------------------------------------------------

namespace {

// Detecta si la columna `group_id` existe en `entities`. Sin la columna, el
// filtro no tiene nada que hacer y devuelve sin tocar el grafo. La migracion
// (issue 0035a) la anade en BDs nuevas y existentes; pero si por algun motivo
// abrimos una BD vieja antes de que migrate corra, no debemos petar.
bool has_group_id_column(sqlite3* db) {
    sqlite3_stmt* st = nullptr;
    if (sqlite3_prepare_v2(db, "PRAGMA table_info(entities)", -1, &st, nullptr) != SQLITE_OK)
        return false;
    bool found = false;
    while (sqlite3_step(st) == SQLITE_ROW) {
        const unsigned char* name = sqlite3_column_text(st, 1);
        if (name && std::strcmp((const char*)name, "group_id") == 0) {
            found = true;
            break;
        }
    }
    sqlite3_finalize(st);
    return found;
}

// Detecta el nombre de la columna de tipo en entities (puede ser type_ref o
// type segun la version del schema).
std::string entity_type_column(sqlite3* db) {
    sqlite3_stmt* st = nullptr;
    if (sqlite3_prepare_v2(db, "PRAGMA table_info(entities)", -1, &st, nullptr) != SQLITE_OK)
        return "type_ref";
    std::string col = "type_ref";
    bool seen_type_ref = false;
    bool seen_type = false;
    while (sqlite3_step(st) == SQLITE_ROW) {
        const unsigned char* name = sqlite3_column_text(st, 1);
        if (!name) continue;
        if (std::strcmp((const char*)name, "type_ref") == 0) seen_type_ref = true;
        if (std::strcmp((const char*)name, "type") == 0)     seen_type = true;
    }
    sqlite3_finalize(st);
    if (seen_type_ref) col = "type_ref";
    else if (seen_type) col = "type";
    return col;
}

} // anon

bool apply_group_filter(GraphData* g, const char* db_path,
                        const std::unordered_map<std::string, bool>& group_expanded) {
    if (!g || !db_path || !*db_path) return false;
    if (g->node_count <= 0) return true;

    sqlite3* db = nullptr;
    if (sqlite3_open(db_path, &db) != SQLITE_OK) {
        if (db) sqlite3_close(db);
        return false;
    }
    if (!has_group_id_column(db)) {
        sqlite3_close(db);
        return true; // schema antiguo: nada que filtrar
    }

    std::string type_col = entity_type_column(db);

    // Lee (id, group_id, type) para todas las entidades. Construimos:
    //   - hash_to_entity_id: user_data (FNV1a64 del id) → string id
    //   - entity_to_group_id: id string → group_id string ("" si NULL)
    //   - is_group_type: id string → true si type == "Group"
    std::unordered_map<uint64_t, std::string> hash_to_entity_id;
    std::unordered_map<std::string, std::string> entity_to_group_id;
    std::unordered_set<std::string> group_entities;

    {
        std::string q = "SELECT id, group_id, " + type_col + " FROM entities";
        sqlite3_stmt* st = nullptr;
        if (sqlite3_prepare_v2(db, q.c_str(), -1, &st, nullptr) != SQLITE_OK) {
            sqlite3_close(db);
            return false;
        }
        while (sqlite3_step(st) == SQLITE_ROW) {
            const unsigned char* id_c  = sqlite3_column_text(st, 0);
            const unsigned char* gid_c = sqlite3_column_text(st, 1);
            const unsigned char* tp_c  = sqlite3_column_text(st, 2);
            if (!id_c) continue;
            std::string id_s   = (const char*)id_c;
            std::string gid_s  = gid_c ? (const char*)gid_c : "";
            std::string type_s = tp_c ? (const char*)tp_c : "";
            hash_to_entity_id[gf_fnv1a64(id_s.c_str())] = id_s;
            if (!gid_s.empty()) entity_to_group_id[id_s] = gid_s;
            if (type_s == "Group") group_entities.insert(id_s);
        }
        sqlite3_finalize(st);
    }
    sqlite3_close(db);

    // Helper: devuelve true si el entity_id `eid` es un grupo expandido.
    auto is_expanded = [&](const std::string& eid) -> bool {
        auto it = group_expanded.find(eid);
        return it != group_expanded.end() && it->second;
    };

    // Para cada nodo del grafo, decidir:
    //   - hidden: se elimina del array final
    //   - effective_node_idx: indice del nodo en el array nuevo, o -1 si oculto
    //   - redirect_to_group_idx: si oculto y su grupo existe en el grafo y NO
    //     esta expandido, las aristas se redirigen al indice del grupo en el
    //     array nuevo. Si el grupo no existe en el grafo (orfano), se deja -1
    //     y las aristas se descartan.
    // Construimos primero un map hash→old_idx y entity_id→old_idx para
    // resolver rapido los grupos.
    std::unordered_map<std::string, int> entity_id_to_old_idx;
    entity_id_to_old_idx.reserve((size_t)g->node_count);
    for (int i = 0; i < g->node_count; ++i) {
        auto it = hash_to_entity_id.find(g->nodes[i].user_data);
        if (it != hash_to_entity_id.end()) {
            entity_id_to_old_idx[it->second] = i;
        }
    }

    // Decidir visibility por nodo.
    std::vector<unsigned char> hidden((size_t)g->node_count, 0);
    std::vector<int> redirect_old(g->node_count, -1); // old_idx → old_idx del grupo (si oculto)

    for (int i = 0; i < g->node_count; ++i) {
        auto hit = hash_to_entity_id.find(g->nodes[i].user_data);
        if (hit == hash_to_entity_id.end()) continue;
        const std::string& eid = hit->second;
        auto git = entity_to_group_id.find(eid);
        if (git == entity_to_group_id.end()) continue;
        const std::string& parent = git->second;
        if (is_expanded(parent)) continue; // grupo expandido → visible
        // Nodo oculto. Buscar el indice del grupo padre en el grafo.
        auto pit = entity_id_to_old_idx.find(parent);
        if (pit != entity_id_to_old_idx.end()) {
            redirect_old[i] = pit->second;
            hidden[i] = 1;
        } else {
            // Grupo padre no presente en el grafo cargado — ocultamos sin
            // redirigir. Las aristas se descartan.
            hidden[i] = 1;
        }
    }

    // Compactar nodos: nuevo array sin los ocultos. Mantenemos un map old→new.
    std::vector<int> old_to_new((size_t)g->node_count, -1);
    int new_count = 0;
    for (int i = 0; i < g->node_count; ++i) {
        if (hidden[i]) continue;
        old_to_new[i] = new_count;
        if (new_count != i) g->nodes[new_count] = g->nodes[i];
        ++new_count;
    }
    int original_node_count = g->node_count;
    g->node_count = new_count;

    // Helper: dado old_idx, devuelve el new_idx efectivo (redirige al grupo
    // si oculto). Devuelve -1 si imposible (oculto sin grupo en el grafo).
    auto effective_new_idx = [&](int old_idx) -> int {
        if (old_idx < 0 || old_idx >= original_node_count) return -1;
        if (!hidden[old_idx]) return old_to_new[old_idx];
        int g_old = redirect_old[old_idx];
        if (g_old < 0) return -1;
        return old_to_new[g_old]; // el grupo es siempre visible
    };

    // Filtrar/redirigir aristas.
    // Para deduplicacion grupo-a-grupo: clave = (min(grp_old_a, grp_old_b),
    // max(...), rel_type_id). Asi mantenemos UNA arista por par + tipo.
    std::unordered_set<uint64_t> seen_grp_pairs;
    auto pair_key = [](int a, int b, uint16_t rt) -> uint64_t {
        if (a > b) std::swap(a, b);
        // a, b en uint32 + rt en 16 bits. Combinamos.
        uint64_t k = (uint64_t)(uint32_t)a;
        k = (k << 24) ^ (uint64_t)(uint32_t)b;
        k = (k << 16) ^ (uint64_t)rt;
        return k;
    };

    int new_edge_count = 0;
    for (int e = 0; e < g->edge_count; ++e) {
        GraphEdge& edge = g->edges[e];
        int s_old = (int)edge.source;
        int t_old = (int)edge.target;
        if (s_old < 0 || s_old >= original_node_count ||
            t_old < 0 || t_old >= original_node_count) continue;

        bool s_hidden = hidden[s_old] != 0;
        bool t_hidden = hidden[t_old] != 0;

        int s_new = effective_new_idx(s_old);
        int t_new = effective_new_idx(t_old);
        if (s_new < 0 || t_new < 0) continue; // descarte por orfandad

        // Caso 1: ambos extremos eran hijos de grupo(s) colapsado(s).
        if (s_hidden && t_hidden) {
            // Si caen en el MISMO grupo → arista interna, se descarta
            // (issue 0035 decision 5).
            int s_grp_old = redirect_old[s_old];
            int t_grp_old = redirect_old[t_old];
            if (s_grp_old < 0 || t_grp_old < 0) continue;
            if (s_grp_old == t_grp_old) continue;
            // Distintos grupos → dedup por par + rel_type.
            uint64_t k = pair_key(s_grp_old, t_grp_old, edge.type_id);
            if (seen_grp_pairs.count(k)) continue;
            seen_grp_pairs.insert(k);
            edge.source = (uint32_t)s_new;
            edge.target = (uint32_t)t_new;
        }
        else if (s_hidden || t_hidden) {
            // Cross-edge: un extremo dentro de grupo colapsado, otro fuera.
            // Redirigimos el extremo oculto al grupo. Sin dedup (issue 0035
            // decision 5: una linea por arista cuando es single-cross).
            edge.source = (uint32_t)s_new;
            edge.target = (uint32_t)t_new;
        }
        else {
            // Ambos visibles — arista normal.
            edge.source = (uint32_t)s_new;
            edge.target = (uint32_t)t_new;
        }

        if (new_edge_count != e) g->edges[new_edge_count] = edge;
        ++new_edge_count;
    }
    g->edge_count = new_edge_count;
    g->update_bounds();
    return true;
}

// ----------------------------------------------------------------------------
// apply_group_inherited_visuals (issue 0035e)
// ----------------------------------------------------------------------------
//
// Para cada nodo Group del grafo, consulta los `type_ref` distintos de sus
// hijos (entities con group_id apuntando al grupo). Si todos comparten un
// solo tipo (homogeneo), reasigna el `type_id` del nodo Group al type_id de
// ese tipo y fija `shape_override = SHAPE_SQUARE` para preservar la forma
// distintiva de contenedor. Asi el cuadrado adopta color e icono del tipo
// hijo. Si la familia es heterogenea o el tipo hijo no esta presente en
// graph.types[], el nodo conserva su visual generico (Group / slate).
//
// Idempotente: si la heredancia ya se aplico, vuelve a aplicar lo mismo.
// No-op si la BD no tiene group_id, o si no hay nodos Group.
bool apply_group_inherited_visuals(GraphData* g, const char* db_path) {
    if (!g || !db_path || !*db_path) return false;
    if (g->node_count <= 0 || g->type_count <= 0) return true;

    sqlite3* db = nullptr;
    if (sqlite3_open(db_path, &db) != SQLITE_OK) {
        if (db) sqlite3_close(db);
        return false;
    }
    if (!has_group_id_column(db)) {
        sqlite3_close(db);
        return true;
    }

    std::string type_col = entity_type_column(db);

    // Localizar el type_id del tipo "Group" en graph.types[].
    int group_type_id = -1;
    for (int i = 0; i < g->type_count; ++i) {
        const char* nm = g->types[i].name;
        if (nm && (std::strcmp(nm, "Group") == 0 ||
                   std::strcmp(nm, "group") == 0)) {
            group_type_id = i;
            break;
        }
    }
    if (group_type_id < 0) { sqlite3_close(db); return true; }

    // user_data (FNV1a64 del id) → entity_id string, para resolver cada
    // nodo Group del grafo a su id real en operations.db.
    // Solo nos interesan los Group nodes — filtramos por type_id.
    std::vector<std::pair<int, std::string>> group_nodes;  // (node_idx, entity_id)
    {
        // Cargamos un map id→user_data inverso unico via consulta directa
        // a operations.db (id texto → user_data). Mas barato: iterar el
        // grafo + invertir hash via consulta.
        // Construimos hash→id desde la BD (igual que apply_group_filter).
        std::unordered_map<uint64_t, std::string> hash_to_id;
        std::string q = "SELECT id FROM entities WHERE " + type_col + " = 'Group'";
        sqlite3_stmt* st = nullptr;
        if (sqlite3_prepare_v2(db, q.c_str(), -1, &st, nullptr) == SQLITE_OK) {
            while (sqlite3_step(st) == SQLITE_ROW) {
                const unsigned char* idc = sqlite3_column_text(st, 0);
                if (!idc) continue;
                std::string ids = (const char*)idc;
                hash_to_id[gf_fnv1a64(ids.c_str())] = ids;
            }
            sqlite3_finalize(st);
        }
        for (int i = 0; i < g->node_count; ++i) {
            // Solo nodos cuyo type_id resuelve a Group. Si la inheritance ya
            // se aplico en una pasada previa, el type_id ya no es Group y
            // el nodo se omite — idempotencia natural pero significa que
            // si el set de hijos cambia, hace falta recargar el grafo.
            if (g->nodes[i].type_id != (uint16_t)group_type_id) continue;
            auto it = hash_to_id.find(g->nodes[i].user_data);
            if (it != hash_to_id.end()) group_nodes.emplace_back(i, it->second);
        }
    }

    if (group_nodes.empty()) { sqlite3_close(db); return true; }

    // Para cada Group, contar type_refs distintos de sus hijos.
    // Solo consideramos hijos con group_id == group.id Y type_ref != 'Group'
    // (un Group hijo de otro Group seria meta-anidacion, fuera de scope).
    std::string child_q =
        "SELECT DISTINCT " + type_col + " FROM entities "
        "WHERE group_id = ? AND " + type_col + " != 'Group'";
    sqlite3_stmt* cst = nullptr;
    if (sqlite3_prepare_v2(db, child_q.c_str(), -1, &cst, nullptr) != SQLITE_OK) {
        sqlite3_close(db);
        return false;
    }

    auto find_type_id_by_name = [&](const std::string& nm) -> int {
        for (int i = 0; i < g->type_count; ++i) {
            const char* tn = g->types[i].name;
            if (!tn) continue;
            // case-insensitive match
            if (nm.size() != std::strlen(tn)) continue;
            bool eq = true;
            for (size_t k = 0; k < nm.size(); ++k) {
                if (std::tolower((unsigned char)nm[k]) !=
                    std::tolower((unsigned char)tn[k])) { eq = false; break; }
            }
            if (eq) return i;
        }
        return -1;
    };

    for (auto& [node_idx, eid] : group_nodes) {
        sqlite3_reset(cst);
        sqlite3_clear_bindings(cst);
        sqlite3_bind_text(cst, 1, eid.c_str(), -1, SQLITE_TRANSIENT);
        std::string single_type;
        bool homogeneous = true;
        int distinct_count = 0;
        while (sqlite3_step(cst) == SQLITE_ROW) {
            const unsigned char* tc = sqlite3_column_text(cst, 0);
            std::string t = tc ? (const char*)tc : "";
            if (t.empty()) continue;
            ++distinct_count;
            if (distinct_count == 1) single_type = t;
            else if (t != single_type) { homogeneous = false; break; }
        }
        if (!homogeneous || distinct_count != 1) continue;
        int new_type_id = find_type_id_by_name(single_type);
        if (new_type_id < 0 || new_type_id == group_type_id) continue;
        g->nodes[node_idx].type_id = (uint16_t)new_type_id;
        g->nodes[node_idx].shape_override = SHAPE_SQUARE;  // mantener cuadrado
    }
    sqlite3_finalize(cst);
    sqlite3_close(db);
    return true;
}

} // namespace ge