#include "data_table_logic.h" #include #include #include #include #include #include #include #include namespace data_table { const char* op_label(Op o) { switch (o) { case Op::Eq: return "="; case Op::Neq: return "!="; case Op::Gt: return ">"; case Op::Gte: return ">="; case Op::Lt: return "<"; case Op::Lte: return "<="; case Op::Contains: return "contains"; case Op::NotContains: return "!contains"; case Op::StartsWith: return "starts"; case Op::EndsWith: return "ends"; } return "?"; } bool op_is_string_only(Op o) { return o == Op::Contains || o == Op::NotContains || o == Op::StartsWith || o == Op::EndsWith; } const char* column_type_name(ColumnType t) { switch (t) { case ColumnType::Auto: return "auto"; case ColumnType::String: return "string"; case ColumnType::Int: return "int"; case ColumnType::Float: return "float"; case ColumnType::Bool: return "bool"; case ColumnType::Date: return "date"; case ColumnType::Json: return "json"; } return "?"; } // Icons Tabler (UTF-8). Mantenidos como strings para no forzar include de icons_tabler.h aqui. const char* column_type_icon(ColumnType t) { switch (t) { case ColumnType::Auto: return "\xef\xa4\x9d"; // TI_HELP_CIRCLE case ColumnType::String: return "\xef\x95\xa7"; // TI_ABC case ColumnType::Int: return "\xef\x95\x94"; // TI_123 case ColumnType::Float: return "\xef\xa8\xa6"; // TI_DECIMAL case ColumnType::Bool: return "\xee\xae\xa6"; // TI_CHECKBOX case ColumnType::Date: return "\xee\xa9\x93"; // TI_CALENDAR case ColumnType::Json: return "\xee\xaf\x8c"; // TI_BRACES } return "?"; } std::vector ops_for_type(ColumnType t) { switch (t) { case ColumnType::Int: case ColumnType::Float: case ColumnType::Date: return {Op::Eq, Op::Neq, Op::Gt, Op::Gte, Op::Lt, Op::Lte}; case ColumnType::Bool: return {Op::Eq, Op::Neq}; case ColumnType::Json: return {Op::Eq, Op::Neq, Op::Contains, Op::NotContains}; case ColumnType::String: return {Op::Eq, Op::Neq, Op::Contains, Op::NotContains, Op::StartsWith, Op::EndsWith}; case ColumnType::Auto: default: return {Op::Eq, Op::Neq, Op::Contains, Op::NotContains}; } } namespace { bool is_bool_text(const char* s) { return std::strcmp(s, "true") == 0 || std::strcmp(s, "false") == 0; } bool is_date_iso(const char* s) { // YYYY-MM-DD minimo if (std::strlen(s) < 10) return false; auto d = [](char c){ return c >= '0' && c <= '9'; }; return d(s[0]) && d(s[1]) && d(s[2]) && d(s[3]) && s[4] == '-' && d(s[5]) && d(s[6]) && s[7] == '-' && d(s[8]) && d(s[9]); } bool is_json_text(const char* s) { while (*s == ' ' || *s == '\t') ++s; return *s == '{' || *s == '['; } bool is_integer_text(const char* s) { if (!*s) return false; if (*s == '-' || *s == '+') ++s; if (!*s) return false; for (; *s; ++s) if (*s < '0' || *s > '9') return false; return true; } } // anon ColumnType auto_detect_type(const char* const* cells, int rows, int cols, int col, int sample_n) { if (col < 0 || col >= cols) return ColumnType::String; int n_total = 0, n_int = 0, n_float = 0, n_bool = 0, n_date = 0, n_json = 0; for (int r = 0; r < rows && n_total < sample_n; ++r) { const char* c = cells[r * cols + col]; if (!c || !*c) continue; n_total++; if (is_bool_text(c)) { n_bool++; continue; } if (is_date_iso(c)) { n_date++; continue; } if (is_json_text(c)) { n_json++; continue; } double v; if (parse_number(c, v)) { if (is_integer_text(c)) n_int++; else n_float++; continue; } // string: no se cuenta a ningun tipo -> garantiza fallthrough a String } if (n_total == 0) return ColumnType::String; if (n_bool == n_total) return ColumnType::Bool; if (n_date == n_total) return ColumnType::Date; if (n_json == n_total) return ColumnType::Json; if (n_int + n_float == n_total) return (n_float > 0) ? ColumnType::Float : ColumnType::Int; return ColumnType::String; } ColumnType effective_type(ColumnType declared, const char* const* cells, int rows, int cols, int col) { if (declared != ColumnType::Auto) return declared; return auto_detect_type(cells, rows, cols, col); } bool parse_number(const char* s, double& out) { if (!s || !*s) return false; char* end = nullptr; double v = std::strtod(s, &end); if (end == s) return false; while (*end == ' ' || *end == '\t') end++; if (*end != '\0') return false; out = v; return true; } bool compare(const char* a, const char* b, Op op) { if (!a) a = ""; if (!b) b = ""; // Ops solo de string (siempre lexical, no intentan numeric). switch (op) { case Op::Contains: return std::strstr(a, b) != nullptr; case Op::NotContains: return std::strstr(a, b) == nullptr; case Op::StartsWith: { size_t lb = std::strlen(b); return std::strncmp(a, b, lb) == 0; } case Op::EndsWith: { size_t la = std::strlen(a), lb = std::strlen(b); return lb <= la && std::strcmp(a + la - lb, b) == 0; } default: break; } double na, nb; bool numeric = parse_number(a, na) && parse_number(b, nb); if (numeric) { switch (op) { case Op::Eq: return na == nb; case Op::Neq: return na != nb; case Op::Gt: return na > nb; case Op::Gte: return na >= nb; case Op::Lt: return na < nb; case Op::Lte: return na <= nb; default: break; } } int c = std::strcmp(a, b); switch (op) { case Op::Eq: return c == 0; case Op::Neq: return c != 0; case Op::Gt: return c > 0; case Op::Gte: return c >= 0; case Op::Lt: return c < 0; case Op::Lte: return c <= 0; default: break; } return false; } // Helpers de State para acceso a stages. void State::ensure_stage0() { if (stages.empty()) stages.push_back(Stage{}); if (active_stage < 0) active_stage = 0; if (active_stage >= (int)stages.size()) active_stage = (int)stages.size() - 1; } Stage& State::raw() { ensure_stage0(); return stages[0]; } const Stage& State::raw() const { static thread_local Stage empty; if (stages.empty()) return empty; return stages[0]; } Stage& State::active() { ensure_stage0(); return stages[active_stage]; } const Stage& State::active_const() const { static thread_local Stage empty; if (stages.empty()) return empty; int a = active_stage; if (a < 0 || a >= (int)stages.size()) a = 0; return stages[a]; } // Compatibilidad: aplica filters + primer sort del stage 0 (Raw). Si el state // no tiene stages, devuelve todas las filas sin filtrar. Util para tests y // para el render path actual (que solo opera sobre Raw cuando no hay grouping). std::vector compute_visible_rows(const char* const* cells, int rows, int cols, const State& st) { std::vector out; out.reserve(rows); const Stage& s = st.raw(); for (int r = 0; r < rows; ++r) { bool keep = true; for (const auto& f : s.filters) { if (f.col < 0 || f.col >= cols) continue; const char* cell = cells[r * cols + f.col]; if (!compare(cell, f.value.c_str(), f.op)) { keep = false; break; } } if (keep) out.push_back(r); } if (!s.sorts.empty()) { // El stage 0 stores sorts as {col_name, desc}. Para compat, si el // nombre es vacio o "@idx", interpretamos como indice numerico. const SortClause& sc0 = s.sorts.front(); int sc = -1; // Permitir nombre numerico estilo "@idx" o lookup posicional via // primer caracter '@'. Sino, busqueda por header no posible aqui // (no tenemos headers) — devuelve sin sort. Para compat de tests // usamos nombre "@N" donde N es indice 0-based. if (!sc0.col.empty() && sc0.col[0] == '@') { sc = std::atoi(sc0.col.c_str() + 1); } bool desc = sc0.desc; if (sc >= 0 && sc < cols) { std::sort(out.begin(), out.end(), [&](int a, int b) { const char* ca = cells[a * cols + sc]; const char* cb = cells[b * cols + sc]; if (!ca) ca = ""; if (!cb) cb = ""; double na, nb; bool num = parse_number(ca, na) && parse_number(cb, nb); int cmp; if (num) cmp = (na < nb) ? -1 : (na > nb ? 1 : 0); else cmp = std::strcmp(ca, cb); return desc ? (cmp > 0) : (cmp < 0); }); } } return out; } ColStats compute_column_stats(const char* const* cells, int rows, int cols, int col, int unique_cap, const int* indices, int n_indices) { ColStats s; if (col < 0 || col >= cols) return s; bool use_idx = (indices != nullptr && n_indices > 0); int n = use_idx ? n_indices : rows; s.total = n; std::unordered_map counts; if (unique_cap > 0) counts.reserve(std::min(unique_cap, n)); bool all_numeric = true; std::vector nums; nums.reserve(n); for (int i = 0; i < n; ++i) { int r = use_idx ? indices[i] : i; if (r < 0 || r >= rows) continue; const char* c = cells[r * cols + col]; if (!c || !*c) { s.empty_count++; continue; } double v; if (parse_number(c, v)) { if (s.numeric_count == 0) { s.min = v; s.max = v; } else { if (v < s.min) s.min = v; if (v > s.max) s.max = v; } s.sum += v; s.numeric_count++; nums.push_back(v); } else { all_numeric = false; } if (unique_cap == 0 || (int)counts.size() < unique_cap) { counts[c]++; } else { auto it = counts.find(c); if (it != counts.end()) it->second++; else s.unique_capped = true; } } s.unique_count = (int)counts.size(); s.numeric = all_numeric && s.numeric_count > 0; if (s.numeric_count > 0) s.mean = s.sum / s.numeric_count; // Top 8 categorias por count desc. if (!counts.empty()) { std::vector> v(counts.begin(), counts.end()); int topN = std::min(8, (int)v.size()); std::partial_sort(v.begin(), v.begin() + topN, v.end(), [](const auto& a, const auto& b){ return a.second > b.second; }); v.resize(topN); s.top_categories = std::move(v); } if (s.numeric && !nums.empty()) { std::sort(nums.begin(), nums.end()); auto pct = [&](double p) { double idx = p * (nums.size() - 1); size_t lo = (size_t)idx; size_t hi = std::min(lo + 1, nums.size() - 1); double t = idx - lo; return nums[lo] * (1.0 - t) + nums[hi] * t; }; s.p25 = pct(0.25); s.p50 = pct(0.50); s.p75 = pct(0.75); s.hist.assign(HIST_BINS, 0.0f); double range = s.max - s.min; if (range <= 0) { s.hist[HIST_BINS / 2] = (float)nums.size(); } else { for (double v : nums) { int b = (int)((v - s.min) / range * HIST_BINS); if (b < 0) b = 0; if (b >= HIST_BINS) b = HIST_BINS - 1; s.hist[b] += 1.0f; } } } return s; } void reorder_column(State& st, int src, int dst) { if (src == dst) return; auto it_s = std::find(st.col_order.begin(), st.col_order.end(), src); auto it_d = std::find(st.col_order.begin(), st.col_order.end(), dst); if (it_s == st.col_order.end() || it_d == st.col_order.end()) return; int si = (int)(it_s - st.col_order.begin()); int di = (int)(it_d - st.col_order.begin()); int v = st.col_order[si]; st.col_order.erase(st.col_order.begin() + si); // Insertar en `di`: cubre ambos sentidos. Para si insert(di) lo // coloca al final de la posicion logica original de dst. Para si>di // (drag izquierda) dst sigue en di y src queda antes. if (di > (int)st.col_order.size()) di = (int)st.col_order.size(); st.col_order.insert(st.col_order.begin() + di, v); } std::string csv_escape(const char* s) { if (!s) return ""; bool needs = false; for (const char* p = s; *p; ++p) { if (*p == ',' || *p == '"' || *p == '\n' || *p == '\r') { needs = true; break; } } if (!needs) return std::string(s); std::string out; out.reserve(std::strlen(s) + 4); out += '"'; for (const char* p = s; *p; ++p) { if (*p == '"') out += '"'; out += *p; } out += '"'; return out; } namespace { std::string tsv_sanitize(const char* s) { std::string out; if (!s) return out; out.reserve(std::strlen(s)); for (const char* p = s; *p; ++p) { char ch = *p; if (ch == '\t' || ch == '\n' || ch == '\r') ch = ' '; out += ch; } return out; } } // anon std::string build_tsv(const char* const* cells, int rows, int cols, const char* const* headers, const std::vector& col_order, const std::vector& col_visible, const std::vector& visible_rows, int view_row_lo, int view_row_hi, int view_col_lo, int view_col_hi) { if (col_order.empty() || visible_rows.empty()) return ""; int rmin = std::min(view_row_lo, view_row_hi); int rmax = std::max(view_row_lo, view_row_hi); int cmin = std::min(view_col_lo, view_col_hi); int cmax = std::max(view_col_lo, view_col_hi); rmin = std::max(0, rmin); rmax = std::min((int)visible_rows.size() - 1, rmax); cmin = std::max(0, cmin); cmax = std::min((int)col_order.size() - 1, cmax); std::string out; bool first = true; for (int oc = cmin; oc <= cmax; ++oc) { int c = col_order[oc]; if (c < 0 || c >= cols) continue; if (c < (int)col_visible.size() && !col_visible[c]) continue; if (!first) out += '\t'; out += tsv_sanitize(headers[c]); first = false; } out += '\n'; for (int ri = rmin; ri <= rmax; ++ri) { int r = visible_rows[ri]; first = true; for (int oc = cmin; oc <= cmax; ++oc) { int c = col_order[oc]; if (c < 0 || c >= cols) continue; if (c < (int)col_visible.size() && !col_visible[c]) continue; if (!first) out += '\t'; out += tsv_sanitize(cells[r * cols + c]); first = false; } out += '\n'; } return out; } std::string build_csv(const char* const* cells, int rows, int cols, const char* const* headers, const std::vector& col_order, const std::vector& col_visible, const std::vector& visible_rows) { if (col_order.empty()) return ""; std::string out; bool first = true; for (int oc = 0; oc < (int)col_order.size(); ++oc) { int c = col_order[oc]; if (c < 0 || c >= cols) continue; if (c < (int)col_visible.size() && !col_visible[c]) continue; if (!first) out += ','; out += csv_escape(headers[c]); first = false; } out += '\n'; for (int r : visible_rows) { first = true; for (int oc = 0; oc < (int)col_order.size(); ++oc) { int c = col_order[oc]; if (c < 0 || c >= cols) continue; if (c < (int)col_visible.size() && !col_visible[c]) continue; if (!first) out += ','; out += csv_escape(cells[r * cols + c]); first = false; } out += '\n'; } return out; } int find_open_bracket(const char* buf, int len, int cursor, std::string& filter_text) { filter_text.clear(); if (!buf || cursor <= 0 || cursor > len) return -1; for (int i = cursor - 1; i >= 0; --i) { char c = buf[i]; if (c == ']' || c == '\n') return -1; // already closed or new line if (c == '[') { filter_text.assign(buf + i + 1, cursor - i - 1); return i; } } return -1; } std::string insert_column_ref(const std::string& src, int start, int cursor, const std::string& name, int& new_cursor) { if (start < 0 || start > (int)src.size() || cursor < start || cursor > (int)src.size()) { new_cursor = cursor; return src; } std::string replacement = "[" + name + "]"; std::string out; out.reserve(src.size() - (cursor - start) + replacement.size()); out.append(src, 0, start); out += replacement; out.append(src, cursor, std::string::npos); new_cursor = start + (int)replacement.size(); return out; } // ---------------------------------------------------------------------------- // TQL stage compute // ---------------------------------------------------------------------------- const char* agg_fn_name(AggFn f) { switch (f) { case AggFn::Count: return "count"; case AggFn::Sum: return "sum"; case AggFn::Avg: return "avg"; case AggFn::Min: return "min"; case AggFn::Max: return "max"; case AggFn::Distinct: return "distinct"; case AggFn::Stddev: return "stddev"; case AggFn::Median: return "median"; case AggFn::P25: return "p25"; case AggFn::P75: return "p75"; case AggFn::P90: return "p90"; case AggFn::P99: return "p99"; case AggFn::Percentile: return "percentile"; } return "?"; } std::string aggregation_alias(const Aggregation& a) { if (!a.alias.empty()) return a.alias; if (a.fn == AggFn::Count) return "count"; if (a.fn == AggFn::Percentile) { int pct = (int)(a.arg * 100.0 + 0.5); char buf[128]; std::snprintf(buf, sizeof(buf), "p%d_%s", pct, a.col.c_str()); return buf; } std::string out = agg_fn_name(a.fn); out += '_'; out += a.col; return out; } ColumnType aggregation_type(const Aggregation& a, const std::vector& in_headers, const std::vector& in_types) { if (a.fn == AggFn::Count || a.fn == AggFn::Distinct) return ColumnType::Int; if (a.fn == AggFn::Min || a.fn == AggFn::Max) { for (size_t i = 0; i < in_headers.size(); ++i) { if (in_headers[i] == a.col && i < in_types.size()) return in_types[i]; } return ColumnType::String; } return ColumnType::Float; } Filter make_drill_filter(int col_idx, const std::string& value) { Filter f; f.col = col_idx; f.op = Op::Eq; f.value = value; return f; } bool apply_drill_step(State& st, const DrillStep& step) { if (step.target_stage < 0 || step.target_stage >= (int)st.stages.size()) return false; Stage& s = st.stages[step.target_stage]; int pos = step.filter_pos; if (pos < 0 || pos > (int)s.filters.size()) return false; s.filters.insert(s.filters.begin() + pos, step.added); st.active_stage = step.target_stage; return true; } bool drill_up(State& st) { if (st.stages.empty()) return false; if (st.active_stage <= 0) return false; st.active_stage -= 1; return true; } std::string row_to_tsv(const char* const* cells, int rows, int cols, int row_idx, const std::vector& headers) { if (row_idx < 0 || row_idx >= rows || cols <= 0) return ""; std::string out; for (int c = 0; c < cols; ++c) { if (c > 0) out += '\t'; if (c < (int)headers.size()) out += headers[c]; } out += "\r\n"; for (int c = 0; c < cols; ++c) { if (c > 0) out += '\t'; const char* v = cells[row_idx * cols + c]; if (v) out += v; } out += "\r\n"; return out; } std::vector build_filters_from_row(const char* const* cells, int rows, int cols, int row_idx) { std::vector out; if (row_idx < 0 || row_idx >= rows || cols <= 0) return out; for (int c = 0; c < cols; ++c) { const char* v = cells[row_idx * cols + c]; if (!v || !*v) continue; Filter f; f.col = c; f.op = Op::Eq; f.value = v; out.push_back(f); } return out; } bool undo_drill_step(State& st, const DrillStep& step) { if (step.target_stage < 0 || step.target_stage >= (int)st.stages.size()) return false; Stage& s = st.stages[step.target_stage]; int pos = step.filter_pos; if (pos < 0 || pos >= (int)s.filters.size()) return false; s.filters.erase(s.filters.begin() + pos); if (step.prev_active_stage >= 0 && step.prev_active_stage < (int)st.stages.size()) { st.active_stage = step.prev_active_stage; } return true; } std::vector apply_filters(const char* const* cells, int rows, int cols, const std::vector& filters) { std::vector out; out.reserve(rows); for (int r = 0; r < rows; ++r) { bool keep = true; for (const auto& f : filters) { if (f.col < 0 || f.col >= cols) continue; const char* cell = cells[r * cols + f.col]; if (!compare(cell, f.value.c_str(), f.op)) { keep = false; break; } } if (keep) out.push_back(r); } return out; } namespace { int find_col(const std::vector& headers, const std::string& name) { for (size_t i = 0; i < headers.size(); ++i) if (headers[i] == name) return (int)i; return -1; } // Compara dos cells para sort: numerico si ambos parseables, sino lexical. int cmp_cells(const char* a, const char* b) { if (!a) a = ""; if (!b) b = ""; double na, nb; bool num = parse_number(a, na) && parse_number(b, nb); if (num) return (na < nb) ? -1 : (na > nb ? 1 : 0); return std::strcmp(a, b); } void apply_sorts(std::vector& row_idx, const char* const* cells, int cols, const std::vector& headers, const std::vector& sorts) { if (sorts.empty()) return; std::vector sort_cols(sorts.size()); for (size_t i = 0; i < sorts.size(); ++i) sort_cols[i] = find_col(headers, sorts[i].col); std::sort(row_idx.begin(), row_idx.end(), [&](int a, int b){ for (size_t i = 0; i < sorts.size(); ++i) { int sc = sort_cols[i]; if (sc < 0) continue; int c = cmp_cells(cells[a * cols + sc], cells[b * cols + sc]); if (c != 0) return sorts[i].desc ? (c > 0) : (c < 0); } return false; }); } double percentile_value(std::vector& v, double p) { if (v.empty()) return 0.0; std::sort(v.begin(), v.end()); double idx = p * (v.size() - 1); size_t lo = (size_t)idx; size_t hi = std::min(lo + 1, v.size() - 1); double t = idx - lo; return v[lo] * (1.0 - t) + v[hi] * t; } double compute_agg_numeric(AggFn fn, std::vector& vals, double arg) { if (vals.empty()) return 0.0; switch (fn) { case AggFn::Sum: { double s = 0; for (double v : vals) s += v; return s; } case AggFn::Avg: { double s = 0; for (double v : vals) s += v; return s / vals.size(); } case AggFn::Min: { double m = vals[0]; for (double v : vals) if (v < m) m = v; return m; } case AggFn::Max: { double m = vals[0]; for (double v : vals) if (v > m) m = v; return m; } case AggFn::Stddev: { double s = 0; for (double v : vals) s += v; double mean = s / vals.size(); double var = 0; for (double v : vals) { double d = v - mean; var += d * d; } return std::sqrt(var / vals.size()); } case AggFn::Median: return percentile_value(vals, 0.50); case AggFn::P25: return percentile_value(vals, 0.25); case AggFn::P75: return percentile_value(vals, 0.75); case AggFn::P90: return percentile_value(vals, 0.90); case AggFn::P99: return percentile_value(vals, 0.99); case AggFn::Percentile: return percentile_value(vals, arg); default: return 0.0; } } std::string format_double(double v) { char buf[64]; long long iv = (long long)v; if ((double)iv == v) std::snprintf(buf, sizeof(buf), "%lld", iv); else std::snprintf(buf, sizeof(buf), "%.4g", v); return buf; } } // anon StageOutput compute_stage(const char* const* in_cells, int in_rows, int in_cols, const std::vector& in_headers, const std::vector& in_types, const Stage& stage) { StageOutput out; auto visible = apply_filters(in_cells, in_rows, in_cols, stage.filters); bool grouped = !stage.breakouts.empty() || !stage.aggregations.empty(); if (!grouped) { // Passthrough: misma forma, filtrado + ordenado. out.cols = in_cols; out.headers = in_headers; out.types = in_types; // Sort sobre visible. apply_sorts(visible, in_cells, in_cols, in_headers, stage.sorts); out.rows = (int)visible.size(); out.cells.reserve((size_t)out.rows * in_cols); for (int r : visible) { for (int c = 0; c < in_cols; ++c) out.cells.push_back(in_cells[r * in_cols + c]); } return out; } // Grouped: agrupa visible por valores de breakout, calcula aggregations. // Breakouts pueden llevar sufijo `:granularity` para cols Date (fase 10). int nbreaks = (int)stage.breakouts.size(); std::vector break_cols(nbreaks); std::vector break_grans(nbreaks); bool any_trunc = false; for (int i = 0; i < nbreaks; ++i) { std::string col_name; break_grans[i] = parse_breakout_granularity(stage.breakouts[i], col_name); if (break_grans[i] != DateGranularity::None) any_trunc = true; break_cols[i] = find_col(in_headers, col_name); } // Pre-truncate solo cuando hay granularity activa. Strings persistidos en // out.cell_backing para que los punteros sobrevivan al return de la funcion. // Reservamos upfront para que push_back no invalide punteros anteriores. // Tamaño = trunc cells + aggregation cells (peor caso n_groups <= in_rows). out.cell_backing.reserve( (size_t)in_rows * (size_t)nbreaks + (size_t)in_rows * stage.aggregations.size() + 16); std::vector trunc_ptrs; if (any_trunc) { trunc_ptrs.assign((size_t)in_rows * (size_t)nbreaks, nullptr); for (int r = 0; r < in_rows; ++r) { for (int i = 0; i < nbreaks; ++i) { if (break_grans[i] == DateGranularity::None) continue; int bc = break_cols[i]; if (bc < 0) continue; const char* v = in_cells[r * in_cols + bc]; out.cell_backing.emplace_back( truncate_date(v ? v : "", break_grans[i])); trunc_ptrs[(size_t)r * nbreaks + i] = out.cell_backing.back().c_str(); } } } auto cell_for = [&](int r, int i) -> const char* { int bc = break_cols[i]; if (bc < 0) return ""; if (break_grans[i] != DateGranularity::None) { return trunc_ptrs[(size_t)r * nbreaks + i]; } const char* v = in_cells[r * in_cols + bc]; return v ? v : ""; }; auto make_key = [&](int r) -> std::string { std::string k; for (int i = 0; i < nbreaks; ++i) { if (i > 0) k += '\x1f'; // separador unit-separator (no aparece en datos) k += cell_for(r, i); } return k; }; // Mantenemos orden de aparicion para estabilidad pre-sort. std::unordered_map key_to_group; std::vector group_keys; // canonical, no usado salvo debug std::vector> group_rows; // indices en in_cells por grupo std::vector> group_breakvals; // valores break por grupo for (int r : visible) { std::string k = make_key(r); auto it = key_to_group.find(k); int gi; if (it == key_to_group.end()) { gi = (int)group_rows.size(); key_to_group.emplace(k, gi); group_keys.push_back(k); group_rows.emplace_back(); std::vector bv((size_t)nbreaks, ""); for (int i = 0; i < nbreaks; ++i) { bv[i] = cell_for(r, i); } group_breakvals.push_back(std::move(bv)); } else gi = it->second; group_rows[gi].push_back(r); } // Headers + types del output: breakouts + aggregation aliases. int out_cols = (int)stage.breakouts.size() + (int)stage.aggregations.size(); out.cols = out_cols; out.headers.reserve(out_cols); out.types.reserve(out_cols); for (int i = 0; i < nbreaks; ++i) { out.headers.push_back(stage.breakouts[i]); int bc = break_cols[i]; // Si hay granularity activa, el output es String (formato ymd o similar), // no la fecha original. ColumnType ot = ColumnType::String; if (break_grans[i] == DateGranularity::None && bc >= 0 && bc < (int)in_types.size()) { ot = in_types[bc]; } out.types.push_back(ot); } for (const auto& a : stage.aggregations) { out.headers.push_back(aggregation_alias(a)); out.types.push_back(aggregation_type(a, in_headers, in_types)); } // Compute aggregation values por grupo. Reservamos backing con tamaño exacto // para que los punteros .c_str() no se invaliden. int n_groups = (int)group_rows.size(); out.cell_backing.reserve((size_t)n_groups * stage.aggregations.size() + 16); auto store_backing = [&](const std::string& s) -> const char* { out.cell_backing.push_back(s); return out.cell_backing.back().c_str(); }; // Construimos cells por grupo (filas no ordenadas todavia). std::vector flat; flat.reserve((size_t)n_groups * out_cols); for (int gi = 0; gi < n_groups; ++gi) { // breakout values: punteros directos a in_cells (estables). for (size_t i = 0; i < stage.breakouts.size(); ++i) { flat.push_back(group_breakvals[gi][i]); } // aggregations for (const auto& a : stage.aggregations) { if (a.fn == AggFn::Count) { flat.push_back(store_backing(format_double((double)group_rows[gi].size()))); continue; } if (a.fn == AggFn::Distinct) { int ac = find_col(in_headers, a.col); if (ac < 0) { flat.push_back(store_backing("0")); continue; } std::unordered_set uniq; for (int r : group_rows[gi]) { const char* v = in_cells[r * in_cols + ac]; if (v && *v) uniq.insert(v); } flat.push_back(store_backing(format_double((double)uniq.size()))); continue; } int ac = find_col(in_headers, a.col); if (ac < 0) { flat.push_back(store_backing("")); continue; } // min/max sobre strings preserva tipo if ((a.fn == AggFn::Min || a.fn == AggFn::Max) && ac < (int)in_types.size() && (in_types[ac] == ColumnType::String || in_types[ac] == ColumnType::Date)) { const char* best = nullptr; for (int r : group_rows[gi]) { const char* v = in_cells[r * in_cols + ac]; if (!v || !*v) continue; if (!best) { best = v; continue; } int c = std::strcmp(v, best); if ((a.fn == AggFn::Min && c < 0) || (a.fn == AggFn::Max && c > 0)) best = v; } flat.push_back(best ? best : store_backing("")); continue; } std::vector vals; vals.reserve(group_rows[gi].size()); for (int r : group_rows[gi]) { const char* v = in_cells[r * in_cols + ac]; if (!v || !*v) continue; double d; if (parse_number(v, d)) vals.push_back(d); } double agg_val = compute_agg_numeric(a.fn, vals, a.arg); flat.push_back(store_backing(format_double(agg_val))); } } // Sort sobre los n_groups segun stage.sorts (col-name lookup en out.headers). std::vector grp_idx(n_groups); for (int i = 0; i < n_groups; ++i) grp_idx[i] = i; apply_sorts(grp_idx, flat.data(), out_cols, out.headers, stage.sorts); out.rows = n_groups; out.cells.reserve((size_t)n_groups * out_cols); for (int gi : grp_idx) { for (int c = 0; c < out_cols; ++c) { out.cells.push_back(flat[gi * out_cols + c]); } } return out; } // ---------------------------------------------------------------------------- // ViewMode helpers // ---------------------------------------------------------------------------- struct ViewModeInfo { ViewMode m; const char* token; const char* label; int min_cols; bool needs_num; bool needs_cat; bool needs_agg; }; static const ViewModeInfo kViewModes[] = { { ViewMode::Table, "table", "Table", 1, false, false, false }, { ViewMode::Bar, "bar", "Bar (horizontal)", 2, true, true, true }, { ViewMode::Column, "column", "Column (vertical)", 2, true, true, true }, { ViewMode::GroupedBar, "grouped_bar", "Grouped bar", 2, true, true, true }, { ViewMode::StackedBar, "stacked_bar", "Stacked bar", 2, true, true, true }, { ViewMode::Line, "line", "Line", 1, true, false, false }, { ViewMode::Area, "area", "Area", 1, true, false, false }, { ViewMode::Stairs, "stairs", "Stairs", 1, true, false, false }, { ViewMode::Scatter, "scatter", "Scatter", 2, true, false, false }, { ViewMode::Bubble, "bubble", "Bubble", 3, true, false, false }, { ViewMode::Histogram, "histogram", "Histogram", 1, true, false, false }, { ViewMode::Histogram2D, "hist2d", "Histogram 2D", 2, true, false, false }, { ViewMode::Heatmap, "heatmap", "Heatmap", 1, true, false, false }, { ViewMode::BoxPlot, "boxplot", "Box plot", 2, true, true, false }, { ViewMode::Stem, "stem", "Stem", 1, true, false, false }, { ViewMode::ErrorBars, "errorbars", "Error bars", 2, true, false, false }, { ViewMode::Pie, "pie", "Pie", 2, true, true, true }, { ViewMode::Donut, "donut", "Donut", 2, true, true, true }, { ViewMode::Funnel, "funnel", "Funnel", 2, true, true, true }, { ViewMode::Waterfall, "waterfall", "Waterfall", 1, true, false, true }, { ViewMode::KPI, "kpi", "KPI (single)", 1, true, false, true }, { ViewMode::KPIGrid, "kpi_grid", "KPI grid", 1, true, false, true }, { ViewMode::Candlestick, "candlestick", "Candlestick (OHLC)", 4, true, false, false }, { ViewMode::Radar, "radar", "Radar", 2, true, true, false }, }; static const int kViewModesN = (int)(sizeof(kViewModes) / sizeof(kViewModes[0])); const char* view_mode_token(ViewMode m) { for (int i = 0; i < kViewModesN; ++i) if (kViewModes[i].m == m) return kViewModes[i].token; return "table"; } const char* view_mode_label(ViewMode m) { for (int i = 0; i < kViewModesN; ++i) if (kViewModes[i].m == m) return kViewModes[i].label; return "Table"; } ViewMode view_mode_from_token(const char* s) { if (!s) return ViewMode::Table; for (int i = 0; i < kViewModesN; ++i) { if (std::strcmp(kViewModes[i].token, s) == 0) return kViewModes[i].m; } return ViewMode::Table; } int view_mode_min_cols(ViewMode m) { for (int i = 0; i < kViewModesN; ++i) if (kViewModes[i].m == m) return kViewModes[i].min_cols; return 1; } bool view_mode_needs_numeric(ViewMode m) { for (int i = 0; i < kViewModesN; ++i) if (kViewModes[i].m == m) return kViewModes[i].needs_num; return false; } bool view_mode_needs_category(ViewMode m) { for (int i = 0; i < kViewModesN; ++i) if (kViewModes[i].m == m) return kViewModes[i].needs_cat; return false; } bool view_mode_needs_aggregation(ViewMode m) { for (int i = 0; i < kViewModesN; ++i) if (kViewModes[i].m == m) return kViewModes[i].needs_agg; return false; } const ViewMode* all_view_modes(int* n_out) { static ViewMode arr[64]; static bool init = false; if (!init) { for (int i = 0; i < kViewModesN; ++i) arr[i] = kViewModes[i].m; init = true; } if (n_out) *n_out = kViewModesN; return arr; } // ---------------------------------------------------------------------------- // Joins // ---------------------------------------------------------------------------- int resolve_main_idx(const std::vector& tables, const std::string& main_source) { if (tables.empty()) return -1; if (main_source.empty()) return 0; for (size_t i = 0; i < tables.size(); ++i) { if (tables[i].name == main_source) return (int)i; } return 0; } const char* join_strategy_token(JoinStrategy s) { switch (s) { case JoinStrategy::Left: return "left"; case JoinStrategy::Inner: return "inner"; case JoinStrategy::Right: return "right"; case JoinStrategy::Full: return "full"; } return "left"; } JoinStrategy join_strategy_from_token(const char* s) { if (!s) return JoinStrategy::Left; if (std::strcmp(s, "inner") == 0) return JoinStrategy::Inner; if (std::strcmp(s, "right") == 0) return JoinStrategy::Right; if (std::strcmp(s, "full") == 0) return JoinStrategy::Full; return JoinStrategy::Left; } const char* join_strategy_label(JoinStrategy s) { switch (s) { case JoinStrategy::Left: return "left-join"; case JoinStrategy::Inner: return "inner-join"; case JoinStrategy::Right: return "right-join"; case JoinStrategy::Full: return "full-join"; } return "left-join"; } namespace { int find_col_idx(const std::vector& hdrs, const std::string& name) { for (size_t i = 0; i < hdrs.size(); ++i) if (hdrs[i] == name) return (int)i; return -1; } std::string make_key(const char* const* cells, int row, int cols, const std::vector& key_cols) { std::string k; for (int c : key_cols) { if (c < 0 || c >= cols) { k += "\x1f|"; continue; } const char* s = cells[row * cols + c]; k += (s ? s : ""); k += "\x1f"; // separator } return k; } } // anon StageOutput join_tables(const char* const* left_cells, int left_rows, int left_cols, const std::vector& left_headers, const std::vector& left_types, const TableInput& right, const Join& jn) { StageOutput out; // Resolver indices de keys en left y right. std::vector lk_idx, rk_idx; for (const auto& p : jn.on) { lk_idx.push_back(find_col_idx(left_headers, p.first)); rk_idx.push_back(find_col_idx(right.headers, p.second)); } // Resolver fields del derecho a incluir. std::vector right_fields; if (jn.fields.empty()) { for (int i = 0; i < right.cols; ++i) right_fields.push_back(i); } else { for (const auto& f : jn.fields) { int i = find_col_idx(right.headers, f); if (i >= 0) right_fields.push_back(i); } } // Build output headers + types: left + alias.right_field. out.cols = left_cols + (int)right_fields.size(); out.headers.reserve(out.cols); out.types.reserve(out.cols); for (int c = 0; c < left_cols; ++c) { out.headers.push_back(c < (int)left_headers.size() ? left_headers[c] : ""); out.types.push_back(c < (int)left_types.size() ? left_types[c] : ColumnType::Auto); } for (int rc : right_fields) { std::string prefixed = jn.alias.empty() ? right.headers[rc] : (jn.alias + "." + right.headers[rc]); out.headers.push_back(std::move(prefixed)); out.types.push_back(rc < (int)right.types.size() ? right.types[rc] : ColumnType::Auto); } // Hash right rows por key. std::unordered_map> right_idx; right_idx.reserve(right.rows); for (int r = 0; r < right.rows; ++r) { right_idx[make_key(right.cells, r, right.cols, rk_idx)].push_back(r); } // Marca cuales right rows fueron usados (para right/full). std::vector right_matched(right.rows, false); // Backing strings para celdas. out.cell_backing.reserve((size_t)(left_rows + right.rows) * out.cols); auto append_left_row = [&](int lr) { for (int c = 0; c < left_cols; ++c) { const char* s = left_cells[lr * left_cols + c]; out.cell_backing.emplace_back(s ? s : ""); } }; auto append_left_empty = [&]() { for (int c = 0; c < left_cols; ++c) out.cell_backing.emplace_back(""); }; auto append_right_row = [&](int rr) { for (int rc : right_fields) { const char* s = right.cells[rr * right.cols + rc]; out.cell_backing.emplace_back(s ? s : ""); } }; auto append_right_empty = [&]() { for (int rc : right_fields) { (void)rc; out.cell_backing.emplace_back(""); } }; bool include_left = (jn.strategy == JoinStrategy::Left || jn.strategy == JoinStrategy::Inner || jn.strategy == JoinStrategy::Full); bool keep_unmatched_left = (jn.strategy == JoinStrategy::Left || jn.strategy == JoinStrategy::Full); bool keep_unmatched_right = (jn.strategy == JoinStrategy::Right || jn.strategy == JoinStrategy::Full); int row_count = 0; if (include_left || jn.strategy == JoinStrategy::Right) { for (int lr = 0; lr < left_rows; ++lr) { std::string k = make_key(left_cells, lr, left_cols, lk_idx); auto it = right_idx.find(k); if (it == right_idx.end() || it->second.empty()) { if (keep_unmatched_left) { append_left_row(lr); append_right_empty(); ++row_count; } continue; } for (int rr : it->second) { append_left_row(lr); append_right_row(rr); right_matched[rr] = true; ++row_count; } } } if (keep_unmatched_right) { for (int rr = 0; rr < right.rows; ++rr) { if (right_matched[rr]) continue; append_left_empty(); append_right_row(rr); ++row_count; } } out.rows = row_count; // Punteros tras llenar backing. out.cells.reserve(out.cell_backing.size()); for (auto& s : out.cell_backing) out.cells.push_back(s.c_str()); return out; } // ---------------------------------------------------------------------------- // Fase 10: drill extendido — granularity + presets. // ---------------------------------------------------------------------------- const char* date_granularity_token(DateGranularity g) { switch (g) { case DateGranularity::Year: return "year"; case DateGranularity::Month: return "month"; case DateGranularity::Week: return "week"; case DateGranularity::Day: return "day"; case DateGranularity::Hour: return "hour"; default: return ""; } } DateGranularity date_granularity_from_token(const char* s) { if (!s) return DateGranularity::None; std::string t(s); if (t == "year") return DateGranularity::Year; if (t == "month") return DateGranularity::Month; if (t == "week") return DateGranularity::Week; if (t == "day") return DateGranularity::Day; if (t == "hour") return DateGranularity::Hour; return DateGranularity::None; } DateGranularity parse_breakout_granularity(const std::string& breakout, std::string& col_out) { auto pos = breakout.rfind(':'); if (pos == std::string::npos) { col_out = breakout; return DateGranularity::None; } std::string suffix = breakout.substr(pos + 1); DateGranularity g = date_granularity_from_token(suffix.c_str()); if (g == DateGranularity::None) { col_out = breakout; return DateGranularity::None; } col_out = breakout.substr(0, pos); return g; } std::string compose_breakout(const std::string& col, DateGranularity g) { if (g == DateGranularity::None) return col; return col + ":" + date_granularity_token(g); } int nearest_index_1d(double target, const double* xs, int n) { if (n <= 0 || !xs) return -1; int best = -1; double best_d = 0.0; for (int i = 0; i < n; ++i) { double v = xs[i]; if (std::isnan(v)) continue; double d = std::fabs(v - target); if (best < 0 || d < best_d) { best = i; best_d = d; } } return best; } int nearest_index_2d(double tx, double ty, const double* xs, const double* ys, int n) { if (n <= 0 || !xs || !ys) return -1; int best = -1; double best_d = 0.0; for (int i = 0; i < n; ++i) { double x = xs[i], y = ys[i]; if (std::isnan(x) || std::isnan(y)) continue; double dx = x - tx, dy = y - ty; double d = dx*dx + dy*dy; if (best < 0 || d < best_d) { best = i; best_d = d; } } return best; } double pie_angle(double cx, double cy, double mx, double my) { // ImPlot pie: 0 = top, sentido horario. atan2 estandar: 0 = +X (right), CCW. // Conversion: ImPlot angle = atan2(dx, -dy) y normalizar a [0, 2*PI). double dx = mx - cx; double dy = my - cy; double a = std::atan2(dx, -dy); // 0 cuando (dx=0, dy<0) = top const double two_pi = 6.283185307179586; if (a < 0) a += two_pi; return a; } int pie_slice_at_angle(double angle, const double* sums, int n) { if (n <= 0 || !sums) return -1; double total = 0.0; for (int i = 0; i < n; ++i) { if (sums[i] < 0) return -1; total += sums[i]; } if (total <= 0.0) return -1; const double two_pi = 6.283185307179586; if (angle < 0 || angle >= two_pi) return -1; double cum = 0.0; for (int i = 0; i < n; ++i) { cum += (sums[i] / total) * two_pi; if (angle < cum) return i; } return n - 1; // edge case rounding } void heatmap_cell_at(double px, double py, int rows, int cols, int& row_out, int& col_out) { row_out = -1; col_out = -1; if (rows <= 0 || cols <= 0) return; if (px < 0.0 || px >= (double)cols) return; if (py < 0.0 || py >= (double)rows) return; col_out = (int)px; // ImPlot heatmap pinta row 0 arriba; plot Y suele invertirse. Caller // normaliza si necesita. Aqui devolvemos row = floor(py) en coord plot. row_out = (int)py; } void column_min_max(const char* const* cells, int rows, int cols, int col_idx, std::string& min_out, std::string& max_out) { min_out.clear(); max_out.clear(); if (col_idx < 0 || col_idx >= cols) return; bool first = true; for (int r = 0; r < rows; ++r) { const char* v = cells[r * cols + col_idx]; if (!v || !*v) continue; std::string s(v); if (first) { min_out = s; max_out = s; first = false; } else { if (s < min_out) min_out = s; if (s > max_out) max_out = s; } } } namespace { // Parse ISO "YYYY-MM-DD..." -> (y, m, d). True si los 3 primeros campos OK. bool parse_ymd(const std::string& s, int& y, int& m, int& d) { if (s.size() < 10) return false; for (int i : {0,1,2,3,5,6,8,9}) { if (s[(size_t)i] < '0' || s[(size_t)i] > '9') return false; } if (s[4] != '-' || s[7] != '-') return false; y = (s[0]-'0')*1000 + (s[1]-'0')*100 + (s[2]-'0')*10 + (s[3]-'0'); m = (s[5]-'0')*10 + (s[6]-'0'); d = (s[8]-'0')*10 + (s[9]-'0'); if (m < 1 || m > 12 || d < 1 || d > 31) return false; return true; } // Dias desde 0001-01-01 (proleptic Gregorian). long ymd_to_days(int y, int m, int d) { if (m <= 2) { y -= 1; m += 12; } long era = (y >= 0 ? y : y - 399) / 400; unsigned yoe = (unsigned)(y - era * 400); unsigned doy = (unsigned)((153 * (m - 3) + 2) / 5 + d - 1); unsigned doe = yoe * 365 + yoe/4 - yoe/100 + doy; return era * 146097 + (long)doe; } void days_to_ymd(long days, int& y, int& m, int& d) { long era = (days >= 0 ? days : days - 146096) / 146097; unsigned doe = (unsigned)(days - era * 146097); unsigned yoe = (doe - doe/1460 + doe/36524 - doe/146096) / 365; int yr = (int)yoe + (int)era * 400; unsigned doy = doe - (365*yoe + yoe/4 - yoe/100); unsigned mp = (5*doy + 2)/153; unsigned day = doy - (153*mp + 2)/5 + 1; unsigned mon = mp < 10 ? mp + 3 : mp - 9; if (mon <= 2) yr += 1; y = yr; m = (int)mon; d = (int)day; } } // anon std::string truncate_date(const std::string& date, DateGranularity g) { if (g == DateGranularity::None) return date; int y, m, d; if (!parse_ymd(date, y, m, d)) return date; char buf[32]; switch (g) { case DateGranularity::Year: std::snprintf(buf, sizeof(buf), "%04d", y); return buf; case DateGranularity::Month: std::snprintf(buf, sizeof(buf), "%04d-%02d", y, m); return buf; case DateGranularity::Day: std::snprintf(buf, sizeof(buf), "%04d-%02d-%02d", y, m, d); return buf; case DateGranularity::Hour: { int hh = 0; if (date.size() >= 13 && date[10] == 'T' && date[11] >= '0' && date[11] <= '9' && date[12] >= '0' && date[12] <= '9') { hh = (date[11]-'0')*10 + (date[12]-'0'); if (hh < 0 || hh > 23) hh = 0; } std::snprintf(buf, sizeof(buf), "%04d-%02d-%02dT%02d", y, m, d, hh); return buf; } case DateGranularity::Week: { // Hinnant ymd_to_days: day 0 == 0000-03-01 (Wednesday). // days%7: 0=Wed, 1=Thu, 2=Fri, 3=Sat, 4=Sun, 5=Mon, 6=Tue. // Monday offset: (mod - 5 + 7) % 7. long days = ymd_to_days(y, m, d); int mod = (int)(((days % 7) + 7) % 7); int rem = ((mod - 5) % 7 + 7) % 7; long monday = days - rem; int yy, mm, dd; days_to_ymd(monday, yy, mm, dd); std::snprintf(buf, sizeof(buf), "%04d-%02d-%02d", yy, mm, dd); return buf; } default: return date; } } DateGranularity auto_date_granularity(const std::string& min_ymd, const std::string& max_ymd) { int y1,m1,d1, y2,m2,d2; if (!parse_ymd(min_ymd, y1,m1,d1)) return DateGranularity::Day; if (!parse_ymd(max_ymd, y2,m2,d2)) return DateGranularity::Day; long span = ymd_to_days(y2,m2,d2) - ymd_to_days(y1,m1,d1); if (span < 0) span = -span; if (span > 730) return DateGranularity::Year; // >2 anios if (span > 60) return DateGranularity::Month; if (span > 14) return DateGranularity::Week; return DateGranularity::Day; } const char* filter_preset_label(FilterPreset p) { switch (p) { case FilterPreset::Last7d: return "Last 7 days"; case FilterPreset::Last30d: return "Last 30 days"; case FilterPreset::Last90d: return "Last 90 days"; case FilterPreset::ExcludeNulls: return "Exclude nulls"; case FilterPreset::NonZero: return "Non-zero only"; } return "?"; } std::vector build_preset_filters(FilterPreset preset, int col, const std::string& today_ymd) { std::vector out; auto last_n = [&](int n) { int y, m, d; if (!parse_ymd(today_ymd, y, m, d)) return; long days = ymd_to_days(y, m, d) - n; int yy, mm, dd; days_to_ymd(days, yy, mm, dd); char buf[16]; std::snprintf(buf, sizeof(buf), "%04d-%02d-%02d", yy, mm, dd); Filter f; f.col = col; f.op = Op::Gte; f.value = buf; out.push_back(f); }; switch (preset) { case FilterPreset::Last7d: last_n(7); break; case FilterPreset::Last30d: last_n(30); break; case FilterPreset::Last90d: last_n(90); break; case FilterPreset::ExcludeNulls: { Filter f; f.col = col; f.op = Op::Neq; f.value = ""; out.push_back(f); break; } case FilterPreset::NonZero: { Filter f1; f1.col = col; f1.op = Op::Neq; f1.value = ""; Filter f2; f2.col = col; f2.op = Op::Neq; f2.value = "0"; out.push_back(f1); out.push_back(f2); break; } } return out; } } // namespace data_table