feat(0133-1+2): columnar snapshot + string interning in data_table
Change 1 — Columnar Snapshot Internal: - Add ColumnSnapshot struct (type + str_ids/i64/f64 per column) in data_table_internal.h - Add SnapshotCache struct with pointer-identity sentinel (last_cells_ptr) - Add SnapshotCache field to UiState singleton - In render(): rebuild snapshot after join materialization when cells ptr changes Uses same pointer-identity pattern as existing stats_last_cells in State Int/Float columns parsed once via parse_number; String/Auto interned Change 2 — String Interning: - Add StringPool struct (strings + unordered_map<string_view, uint32_t>) to data_table_types.h - StringPool is per-State (NOT global) for table isolation - intern(sv) inserts if absent, returns stable uint32_t index - Cleared + rebuilt on each snapshot rebuild for index coherence - Add string_pool field to State struct Documentation: - Extended header comment in data_table_internal.h describing design, StringPool API, invariants (pointer-identity, row→snapshot_row), and how stats_last_cells and snapshot coexist independently Build: fn_module_data_table + tables_qa pass, no new errors (only pre-existing -Wformat-truncation warnings unrelated to this change). Public API (data_table.h, TableInput, render() signature) unchanged. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -816,6 +816,75 @@ void render(const char* id,
|
||||
ensure_init(st, eff_cols);
|
||||
auto& U = ui();
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Issue 0133 — Change 1+2: Columnar snapshot + string interning.
|
||||
//
|
||||
// Se reconstruye si:
|
||||
// - Es el primer frame (last_cells_ptr == nullptr), o
|
||||
// - El puntero de `cells` cambio (caller reemplazo el buffer).
|
||||
//
|
||||
// Snapshot cubre las columnas ORIGINALES (pre-derived) del stage-0 input.
|
||||
// Las derived columns no se incluyen en el snapshot — se calculan en
|
||||
// compute_stage y el snapshot solo optimiza el acceso a datos crudos.
|
||||
//
|
||||
// StringPool.clear() + rebuild siempre que el snapshot se reconstruya,
|
||||
// para mantener coherencia de indices entre pool y snapshot.
|
||||
// -------------------------------------------------------------------------
|
||||
if (U.snapshot.last_cells_ptr != cells) {
|
||||
// Invalidar y reconstruir.
|
||||
U.snapshot.last_cells_ptr = cells;
|
||||
U.snapshot.cols.clear();
|
||||
U.snapshot.cols.resize((size_t)orig_cols);
|
||||
|
||||
// Limpiar el StringPool del State para este rebuild.
|
||||
st.string_pool.clear();
|
||||
// Reservar capacidad estimada para evitar reallocs que invalidarian
|
||||
// los string_view del mapa interno del pool.
|
||||
// Estimamos hasta row_count valores unicos por columna string (worst case).
|
||||
// En practica muchos menos; reserve no aloca el doble automatico.
|
||||
st.string_pool.strings.reserve((size_t)(row_count < 65536 ? row_count : 65536));
|
||||
|
||||
for (int c = 0; c < orig_cols; ++c) {
|
||||
ColumnSnapshot& cs = U.snapshot.cols[(size_t)c];
|
||||
// Detectar tipo efectivo para esta columna.
|
||||
ColumnType d = declared_types ? declared_types[c] : ColumnType::Auto;
|
||||
ColumnType ct = effective_type(d, cells, row_count, orig_cols, c);
|
||||
cs.type = ct;
|
||||
|
||||
if (ct == ColumnType::Int) {
|
||||
cs.i64.resize((size_t)row_count);
|
||||
for (int r = 0; r < row_count; ++r) {
|
||||
const char* sv = cells[(size_t)(r * orig_cols + c)];
|
||||
double tmp = 0.0;
|
||||
if (sv && parse_number(sv, tmp)) {
|
||||
cs.i64[(size_t)r] = (int64_t)tmp;
|
||||
} else {
|
||||
cs.i64[(size_t)r] = 0;
|
||||
}
|
||||
}
|
||||
} else if (ct == ColumnType::Float) {
|
||||
cs.f64.resize((size_t)row_count);
|
||||
for (int r = 0; r < row_count; ++r) {
|
||||
const char* sv = cells[(size_t)(r * orig_cols + c)];
|
||||
double tmp = 0.0;
|
||||
if (sv && parse_number(sv, tmp)) {
|
||||
cs.f64[(size_t)r] = tmp;
|
||||
} else {
|
||||
cs.f64[(size_t)r] = 0.0;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// String, Bool, Date, Json, Auto → intern as string.
|
||||
cs.str_ids.resize((size_t)row_count);
|
||||
for (int r = 0; r < row_count; ++r) {
|
||||
const char* sv = cells[(size_t)(r * orig_cols + c)];
|
||||
std::string_view svv = sv ? std::string_view(sv) : std::string_view("");
|
||||
cs.str_ids[(size_t)r] = st.string_pool.intern(svv);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Build eff_headers / src_for_eff / eff_types para STAGE 0.
|
||||
std::vector<const char*> eff_headers(eff_cols);
|
||||
std::vector<int> src_for_eff(eff_cols);
|
||||
|
||||
Reference in New Issue
Block a user