Files
primitives_gallery/playground/tables/data_table_logic.cpp
T
egutierrez d782d463cb asegurate de que subimos todo
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-12 03:10:00 +02:00

1106 lines
40 KiB
C++

#include "data_table_logic.h"
#include <algorithm>
#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <string>
#include <unordered_map>
#include <unordered_set>
namespace data_table {
const char* op_label(Op o) {
switch (o) {
case Op::Eq: return "=";
case Op::Neq: return "!=";
case Op::Gt: return ">";
case Op::Gte: return ">=";
case Op::Lt: return "<";
case Op::Lte: return "<=";
case Op::Contains: return "contains";
case Op::NotContains: return "!contains";
case Op::StartsWith: return "starts";
case Op::EndsWith: return "ends";
}
return "?";
}
bool op_is_string_only(Op o) {
return o == Op::Contains || o == Op::NotContains ||
o == Op::StartsWith || o == Op::EndsWith;
}
const char* column_type_name(ColumnType t) {
switch (t) {
case ColumnType::Auto: return "auto";
case ColumnType::String: return "string";
case ColumnType::Int: return "int";
case ColumnType::Float: return "float";
case ColumnType::Bool: return "bool";
case ColumnType::Date: return "date";
case ColumnType::Json: return "json";
}
return "?";
}
// Icons Tabler (UTF-8). Mantenidos como strings para no forzar include de icons_tabler.h aqui.
const char* column_type_icon(ColumnType t) {
switch (t) {
case ColumnType::Auto: return "\xef\xa4\x9d"; // TI_HELP_CIRCLE
case ColumnType::String: return "\xef\x95\xa7"; // TI_ABC
case ColumnType::Int: return "\xef\x95\x94"; // TI_123
case ColumnType::Float: return "\xef\xa8\xa6"; // TI_DECIMAL
case ColumnType::Bool: return "\xee\xae\xa6"; // TI_CHECKBOX
case ColumnType::Date: return "\xee\xa9\x93"; // TI_CALENDAR
case ColumnType::Json: return "\xee\xaf\x8c"; // TI_BRACES
}
return "?";
}
std::vector<Op> ops_for_type(ColumnType t) {
switch (t) {
case ColumnType::Int:
case ColumnType::Float:
case ColumnType::Date:
return {Op::Eq, Op::Neq, Op::Gt, Op::Gte, Op::Lt, Op::Lte};
case ColumnType::Bool:
return {Op::Eq, Op::Neq};
case ColumnType::Json:
return {Op::Eq, Op::Neq, Op::Contains, Op::NotContains};
case ColumnType::String:
return {Op::Eq, Op::Neq, Op::Contains, Op::NotContains, Op::StartsWith, Op::EndsWith};
case ColumnType::Auto:
default:
return {Op::Eq, Op::Neq, Op::Contains, Op::NotContains};
}
}
namespace {
bool is_bool_text(const char* s) {
return std::strcmp(s, "true") == 0 || std::strcmp(s, "false") == 0;
}
bool is_date_iso(const char* s) {
// YYYY-MM-DD minimo
if (std::strlen(s) < 10) return false;
auto d = [](char c){ return c >= '0' && c <= '9'; };
return d(s[0]) && d(s[1]) && d(s[2]) && d(s[3]) && s[4] == '-' &&
d(s[5]) && d(s[6]) && s[7] == '-' && d(s[8]) && d(s[9]);
}
bool is_json_text(const char* s) {
while (*s == ' ' || *s == '\t') ++s;
return *s == '{' || *s == '[';
}
bool is_integer_text(const char* s) {
if (!*s) return false;
if (*s == '-' || *s == '+') ++s;
if (!*s) return false;
for (; *s; ++s) if (*s < '0' || *s > '9') return false;
return true;
}
} // anon
ColumnType auto_detect_type(const char* const* cells, int rows, int cols,
int col, int sample_n)
{
if (col < 0 || col >= cols) return ColumnType::String;
int n_total = 0, n_int = 0, n_float = 0, n_bool = 0, n_date = 0, n_json = 0;
for (int r = 0; r < rows && n_total < sample_n; ++r) {
const char* c = cells[r * cols + col];
if (!c || !*c) continue;
n_total++;
if (is_bool_text(c)) { n_bool++; continue; }
if (is_date_iso(c)) { n_date++; continue; }
if (is_json_text(c)) { n_json++; continue; }
double v;
if (parse_number(c, v)) {
if (is_integer_text(c)) n_int++;
else n_float++;
continue;
}
// string: no se cuenta a ningun tipo -> garantiza fallthrough a String
}
if (n_total == 0) return ColumnType::String;
if (n_bool == n_total) return ColumnType::Bool;
if (n_date == n_total) return ColumnType::Date;
if (n_json == n_total) return ColumnType::Json;
if (n_int + n_float == n_total) return (n_float > 0) ? ColumnType::Float : ColumnType::Int;
return ColumnType::String;
}
ColumnType effective_type(ColumnType declared, const char* const* cells,
int rows, int cols, int col)
{
if (declared != ColumnType::Auto) return declared;
return auto_detect_type(cells, rows, cols, col);
}
bool parse_number(const char* s, double& out) {
if (!s || !*s) return false;
char* end = nullptr;
double v = std::strtod(s, &end);
if (end == s) return false;
while (*end == ' ' || *end == '\t') end++;
if (*end != '\0') return false;
out = v;
return true;
}
bool compare(const char* a, const char* b, Op op) {
if (!a) a = "";
if (!b) b = "";
// Ops solo de string (siempre lexical, no intentan numeric).
switch (op) {
case Op::Contains: return std::strstr(a, b) != nullptr;
case Op::NotContains: return std::strstr(a, b) == nullptr;
case Op::StartsWith: {
size_t lb = std::strlen(b);
return std::strncmp(a, b, lb) == 0;
}
case Op::EndsWith: {
size_t la = std::strlen(a), lb = std::strlen(b);
return lb <= la && std::strcmp(a + la - lb, b) == 0;
}
default: break;
}
double na, nb;
bool numeric = parse_number(a, na) && parse_number(b, nb);
if (numeric) {
switch (op) {
case Op::Eq: return na == nb;
case Op::Neq: return na != nb;
case Op::Gt: return na > nb;
case Op::Gte: return na >= nb;
case Op::Lt: return na < nb;
case Op::Lte: return na <= nb;
default: break;
}
}
int c = std::strcmp(a, b);
switch (op) {
case Op::Eq: return c == 0;
case Op::Neq: return c != 0;
case Op::Gt: return c > 0;
case Op::Gte: return c >= 0;
case Op::Lt: return c < 0;
case Op::Lte: return c <= 0;
default: break;
}
return false;
}
// Helpers de State para acceso a stages.
void State::ensure_stage0() {
if (stages.empty()) stages.push_back(Stage{});
if (active_stage < 0) active_stage = 0;
if (active_stage >= (int)stages.size()) active_stage = (int)stages.size() - 1;
}
Stage& State::raw() { ensure_stage0(); return stages[0]; }
const Stage& State::raw() const {
static thread_local Stage empty;
if (stages.empty()) return empty;
return stages[0];
}
Stage& State::active() {
ensure_stage0();
return stages[active_stage];
}
const Stage& State::active_const() const {
static thread_local Stage empty;
if (stages.empty()) return empty;
int a = active_stage;
if (a < 0 || a >= (int)stages.size()) a = 0;
return stages[a];
}
// Compatibilidad: aplica filters + primer sort del stage 0 (Raw). Si el state
// no tiene stages, devuelve todas las filas sin filtrar. Util para tests y
// para el render path actual (que solo opera sobre Raw cuando no hay grouping).
std::vector<int> compute_visible_rows(const char* const* cells,
int rows, int cols,
const State& st)
{
std::vector<int> out;
out.reserve(rows);
const Stage& s = st.raw();
for (int r = 0; r < rows; ++r) {
bool keep = true;
for (const auto& f : s.filters) {
if (f.col < 0 || f.col >= cols) continue;
const char* cell = cells[r * cols + f.col];
if (!compare(cell, f.value.c_str(), f.op)) { keep = false; break; }
}
if (keep) out.push_back(r);
}
if (!s.sorts.empty()) {
// El stage 0 stores sorts as {col_name, desc}. Para compat, si el
// nombre es vacio o "@idx<N>", interpretamos como indice numerico.
const SortClause& sc0 = s.sorts.front();
int sc = -1;
// Permitir nombre numerico estilo "@idx<N>" o lookup posicional via
// primer caracter '@'. Sino, busqueda por header no posible aqui
// (no tenemos headers) — devuelve sin sort. Para compat de tests
// usamos nombre "@N" donde N es indice 0-based.
if (!sc0.col.empty() && sc0.col[0] == '@') {
sc = std::atoi(sc0.col.c_str() + 1);
}
bool desc = sc0.desc;
if (sc >= 0 && sc < cols) {
std::sort(out.begin(), out.end(), [&](int a, int b) {
const char* ca = cells[a * cols + sc];
const char* cb = cells[b * cols + sc];
if (!ca) ca = "";
if (!cb) cb = "";
double na, nb;
bool num = parse_number(ca, na) && parse_number(cb, nb);
int cmp;
if (num) cmp = (na < nb) ? -1 : (na > nb ? 1 : 0);
else cmp = std::strcmp(ca, cb);
return desc ? (cmp > 0) : (cmp < 0);
});
}
}
return out;
}
ColStats compute_column_stats(const char* const* cells, int rows, int cols,
int col, int unique_cap,
const int* indices, int n_indices)
{
ColStats s;
if (col < 0 || col >= cols) return s;
bool use_idx = (indices != nullptr && n_indices > 0);
int n = use_idx ? n_indices : rows;
s.total = n;
std::unordered_map<std::string, int> counts;
if (unique_cap > 0) counts.reserve(std::min(unique_cap, n));
bool all_numeric = true;
std::vector<double> nums;
nums.reserve(n);
for (int i = 0; i < n; ++i) {
int r = use_idx ? indices[i] : i;
if (r < 0 || r >= rows) continue;
const char* c = cells[r * cols + col];
if (!c || !*c) { s.empty_count++; continue; }
double v;
if (parse_number(c, v)) {
if (s.numeric_count == 0) { s.min = v; s.max = v; }
else {
if (v < s.min) s.min = v;
if (v > s.max) s.max = v;
}
s.sum += v;
s.numeric_count++;
nums.push_back(v);
} else {
all_numeric = false;
}
if (unique_cap == 0 || (int)counts.size() < unique_cap) {
counts[c]++;
} else {
auto it = counts.find(c);
if (it != counts.end()) it->second++;
else s.unique_capped = true;
}
}
s.unique_count = (int)counts.size();
s.numeric = all_numeric && s.numeric_count > 0;
if (s.numeric_count > 0) s.mean = s.sum / s.numeric_count;
// Top 8 categorias por count desc.
if (!counts.empty()) {
std::vector<std::pair<std::string,int>> v(counts.begin(), counts.end());
int topN = std::min<int>(8, (int)v.size());
std::partial_sort(v.begin(), v.begin() + topN, v.end(),
[](const auto& a, const auto& b){ return a.second > b.second; });
v.resize(topN);
s.top_categories = std::move(v);
}
if (s.numeric && !nums.empty()) {
std::sort(nums.begin(), nums.end());
auto pct = [&](double p) {
double idx = p * (nums.size() - 1);
size_t lo = (size_t)idx;
size_t hi = std::min(lo + 1, nums.size() - 1);
double t = idx - lo;
return nums[lo] * (1.0 - t) + nums[hi] * t;
};
s.p25 = pct(0.25);
s.p50 = pct(0.50);
s.p75 = pct(0.75);
s.hist.assign(HIST_BINS, 0.0f);
double range = s.max - s.min;
if (range <= 0) {
s.hist[HIST_BINS / 2] = (float)nums.size();
} else {
for (double v : nums) {
int b = (int)((v - s.min) / range * HIST_BINS);
if (b < 0) b = 0;
if (b >= HIST_BINS) b = HIST_BINS - 1;
s.hist[b] += 1.0f;
}
}
}
return s;
}
void reorder_column(State& st, int src, int dst) {
if (src == dst) return;
auto it_s = std::find(st.col_order.begin(), st.col_order.end(), src);
auto it_d = std::find(st.col_order.begin(), st.col_order.end(), dst);
if (it_s == st.col_order.end() || it_d == st.col_order.end()) return;
int si = (int)(it_s - st.col_order.begin());
int di = (int)(it_d - st.col_order.begin());
int v = st.col_order[si];
st.col_order.erase(st.col_order.begin() + si);
// Insertar en `di`: cubre ambos sentidos. Para si<di (drag derecha) el
// erase deja a dst en di-1 y queremos src JUSTO despues -> insert(di) lo
// coloca al final de la posicion logica original de dst. Para si>di
// (drag izquierda) dst sigue en di y src queda antes.
if (di > (int)st.col_order.size()) di = (int)st.col_order.size();
st.col_order.insert(st.col_order.begin() + di, v);
}
std::string csv_escape(const char* s) {
if (!s) return "";
bool needs = false;
for (const char* p = s; *p; ++p) {
if (*p == ',' || *p == '"' || *p == '\n' || *p == '\r') { needs = true; break; }
}
if (!needs) return std::string(s);
std::string out; out.reserve(std::strlen(s) + 4);
out += '"';
for (const char* p = s; *p; ++p) {
if (*p == '"') out += '"';
out += *p;
}
out += '"';
return out;
}
namespace {
std::string tsv_sanitize(const char* s) {
std::string out;
if (!s) return out;
out.reserve(std::strlen(s));
for (const char* p = s; *p; ++p) {
char ch = *p;
if (ch == '\t' || ch == '\n' || ch == '\r') ch = ' ';
out += ch;
}
return out;
}
} // anon
std::string build_tsv(const char* const* cells, int rows, int cols,
const char* const* headers,
const std::vector<int>& col_order,
const std::vector<bool>& col_visible,
const std::vector<int>& visible_rows,
int view_row_lo, int view_row_hi,
int view_col_lo, int view_col_hi)
{
if (col_order.empty() || visible_rows.empty()) return "";
int rmin = std::min(view_row_lo, view_row_hi);
int rmax = std::max(view_row_lo, view_row_hi);
int cmin = std::min(view_col_lo, view_col_hi);
int cmax = std::max(view_col_lo, view_col_hi);
rmin = std::max(0, rmin);
rmax = std::min((int)visible_rows.size() - 1, rmax);
cmin = std::max(0, cmin);
cmax = std::min((int)col_order.size() - 1, cmax);
std::string out;
bool first = true;
for (int oc = cmin; oc <= cmax; ++oc) {
int c = col_order[oc];
if (c < 0 || c >= cols) continue;
if (c < (int)col_visible.size() && !col_visible[c]) continue;
if (!first) out += '\t';
out += tsv_sanitize(headers[c]);
first = false;
}
out += '\n';
for (int ri = rmin; ri <= rmax; ++ri) {
int r = visible_rows[ri];
first = true;
for (int oc = cmin; oc <= cmax; ++oc) {
int c = col_order[oc];
if (c < 0 || c >= cols) continue;
if (c < (int)col_visible.size() && !col_visible[c]) continue;
if (!first) out += '\t';
out += tsv_sanitize(cells[r * cols + c]);
first = false;
}
out += '\n';
}
return out;
}
std::string build_csv(const char* const* cells, int rows, int cols,
const char* const* headers,
const std::vector<int>& col_order,
const std::vector<bool>& col_visible,
const std::vector<int>& visible_rows)
{
if (col_order.empty()) return "";
std::string out;
bool first = true;
for (int oc = 0; oc < (int)col_order.size(); ++oc) {
int c = col_order[oc];
if (c < 0 || c >= cols) continue;
if (c < (int)col_visible.size() && !col_visible[c]) continue;
if (!first) out += ',';
out += csv_escape(headers[c]);
first = false;
}
out += '\n';
for (int r : visible_rows) {
first = true;
for (int oc = 0; oc < (int)col_order.size(); ++oc) {
int c = col_order[oc];
if (c < 0 || c >= cols) continue;
if (c < (int)col_visible.size() && !col_visible[c]) continue;
if (!first) out += ',';
out += csv_escape(cells[r * cols + c]);
first = false;
}
out += '\n';
}
return out;
}
int find_open_bracket(const char* buf, int len, int cursor, std::string& filter_text) {
filter_text.clear();
if (!buf || cursor <= 0 || cursor > len) return -1;
for (int i = cursor - 1; i >= 0; --i) {
char c = buf[i];
if (c == ']' || c == '\n') return -1; // already closed or new line
if (c == '[') {
filter_text.assign(buf + i + 1, cursor - i - 1);
return i;
}
}
return -1;
}
std::string insert_column_ref(const std::string& src, int start, int cursor,
const std::string& name, int& new_cursor)
{
if (start < 0 || start > (int)src.size() || cursor < start || cursor > (int)src.size()) {
new_cursor = cursor;
return src;
}
std::string replacement = "[" + name + "]";
std::string out;
out.reserve(src.size() - (cursor - start) + replacement.size());
out.append(src, 0, start);
out += replacement;
out.append(src, cursor, std::string::npos);
new_cursor = start + (int)replacement.size();
return out;
}
// ----------------------------------------------------------------------------
// TQL stage compute
// ----------------------------------------------------------------------------
const char* agg_fn_name(AggFn f) {
switch (f) {
case AggFn::Count: return "count";
case AggFn::Sum: return "sum";
case AggFn::Avg: return "avg";
case AggFn::Min: return "min";
case AggFn::Max: return "max";
case AggFn::Distinct: return "distinct";
case AggFn::Stddev: return "stddev";
case AggFn::Median: return "median";
case AggFn::P25: return "p25";
case AggFn::P75: return "p75";
case AggFn::P90: return "p90";
case AggFn::P99: return "p99";
case AggFn::Percentile: return "percentile";
}
return "?";
}
std::string aggregation_alias(const Aggregation& a) {
if (!a.alias.empty()) return a.alias;
if (a.fn == AggFn::Count) return "count";
if (a.fn == AggFn::Percentile) {
int pct = (int)(a.arg * 100.0 + 0.5);
char buf[128];
std::snprintf(buf, sizeof(buf), "p%d_%s", pct, a.col.c_str());
return buf;
}
std::string out = agg_fn_name(a.fn);
out += '_';
out += a.col;
return out;
}
ColumnType aggregation_type(const Aggregation& a,
const std::vector<std::string>& in_headers,
const std::vector<ColumnType>& in_types)
{
if (a.fn == AggFn::Count || a.fn == AggFn::Distinct) return ColumnType::Int;
if (a.fn == AggFn::Min || a.fn == AggFn::Max) {
for (size_t i = 0; i < in_headers.size(); ++i) {
if (in_headers[i] == a.col && i < in_types.size()) return in_types[i];
}
return ColumnType::String;
}
return ColumnType::Float;
}
Filter make_drill_filter(int col_idx, const std::string& value) {
Filter f;
f.col = col_idx;
f.op = Op::Eq;
f.value = value;
return f;
}
std::vector<int> apply_filters(const char* const* cells, int rows, int cols,
const std::vector<Filter>& filters)
{
std::vector<int> out;
out.reserve(rows);
for (int r = 0; r < rows; ++r) {
bool keep = true;
for (const auto& f : filters) {
if (f.col < 0 || f.col >= cols) continue;
const char* cell = cells[r * cols + f.col];
if (!compare(cell, f.value.c_str(), f.op)) { keep = false; break; }
}
if (keep) out.push_back(r);
}
return out;
}
namespace {
int find_col(const std::vector<std::string>& headers, const std::string& name) {
for (size_t i = 0; i < headers.size(); ++i) if (headers[i] == name) return (int)i;
return -1;
}
// Compara dos cells para sort: numerico si ambos parseables, sino lexical.
int cmp_cells(const char* a, const char* b) {
if (!a) a = ""; if (!b) b = "";
double na, nb;
bool num = parse_number(a, na) && parse_number(b, nb);
if (num) return (na < nb) ? -1 : (na > nb ? 1 : 0);
return std::strcmp(a, b);
}
void apply_sorts(std::vector<int>& row_idx,
const char* const* cells, int cols,
const std::vector<std::string>& headers,
const std::vector<SortClause>& sorts)
{
if (sorts.empty()) return;
std::vector<int> sort_cols(sorts.size());
for (size_t i = 0; i < sorts.size(); ++i) sort_cols[i] = find_col(headers, sorts[i].col);
std::sort(row_idx.begin(), row_idx.end(), [&](int a, int b){
for (size_t i = 0; i < sorts.size(); ++i) {
int sc = sort_cols[i];
if (sc < 0) continue;
int c = cmp_cells(cells[a * cols + sc], cells[b * cols + sc]);
if (c != 0) return sorts[i].desc ? (c > 0) : (c < 0);
}
return false;
});
}
double percentile_value(std::vector<double>& v, double p) {
if (v.empty()) return 0.0;
std::sort(v.begin(), v.end());
double idx = p * (v.size() - 1);
size_t lo = (size_t)idx;
size_t hi = std::min(lo + 1, v.size() - 1);
double t = idx - lo;
return v[lo] * (1.0 - t) + v[hi] * t;
}
double compute_agg_numeric(AggFn fn, std::vector<double>& vals, double arg) {
if (vals.empty()) return 0.0;
switch (fn) {
case AggFn::Sum: {
double s = 0; for (double v : vals) s += v; return s;
}
case AggFn::Avg: {
double s = 0; for (double v : vals) s += v; return s / vals.size();
}
case AggFn::Min: {
double m = vals[0]; for (double v : vals) if (v < m) m = v; return m;
}
case AggFn::Max: {
double m = vals[0]; for (double v : vals) if (v > m) m = v; return m;
}
case AggFn::Stddev: {
double s = 0; for (double v : vals) s += v;
double mean = s / vals.size();
double var = 0; for (double v : vals) { double d = v - mean; var += d * d; }
return std::sqrt(var / vals.size());
}
case AggFn::Median: return percentile_value(vals, 0.50);
case AggFn::P25: return percentile_value(vals, 0.25);
case AggFn::P75: return percentile_value(vals, 0.75);
case AggFn::P90: return percentile_value(vals, 0.90);
case AggFn::P99: return percentile_value(vals, 0.99);
case AggFn::Percentile: return percentile_value(vals, arg);
default: return 0.0;
}
}
std::string format_double(double v) {
char buf[64];
long long iv = (long long)v;
if ((double)iv == v) std::snprintf(buf, sizeof(buf), "%lld", iv);
else std::snprintf(buf, sizeof(buf), "%.4g", v);
return buf;
}
} // anon
StageOutput compute_stage(const char* const* in_cells, int in_rows, int in_cols,
const std::vector<std::string>& in_headers,
const std::vector<ColumnType>& in_types,
const Stage& stage)
{
StageOutput out;
auto visible = apply_filters(in_cells, in_rows, in_cols, stage.filters);
bool grouped = !stage.breakouts.empty() || !stage.aggregations.empty();
if (!grouped) {
// Passthrough: misma forma, filtrado + ordenado.
out.cols = in_cols;
out.headers = in_headers;
out.types = in_types;
// Sort sobre visible.
apply_sorts(visible, in_cells, in_cols, in_headers, stage.sorts);
out.rows = (int)visible.size();
out.cells.reserve((size_t)out.rows * in_cols);
for (int r : visible) {
for (int c = 0; c < in_cols; ++c) out.cells.push_back(in_cells[r * in_cols + c]);
}
return out;
}
// Grouped: agrupa visible por valores de breakout, calcula aggregations.
std::vector<int> break_cols(stage.breakouts.size());
for (size_t i = 0; i < stage.breakouts.size(); ++i) {
break_cols[i] = find_col(in_headers, stage.breakouts[i]);
}
auto make_key = [&](int r) -> std::string {
std::string k;
for (size_t i = 0; i < break_cols.size(); ++i) {
if (i > 0) k += '\x1f'; // separador unit-separator (no aparece en datos)
int bc = break_cols[i];
if (bc < 0) continue;
const char* v = in_cells[r * in_cols + bc];
k += (v ? v : "");
}
return k;
};
// Mantenemos orden de aparicion para estabilidad pre-sort.
std::unordered_map<std::string, int> key_to_group;
std::vector<std::string> group_keys; // canonical, no usado salvo debug
std::vector<std::vector<int>> group_rows; // indices en in_cells por grupo
std::vector<std::vector<const char*>> group_breakvals; // valores break por grupo
for (int r : visible) {
std::string k = make_key(r);
auto it = key_to_group.find(k);
int gi;
if (it == key_to_group.end()) {
gi = (int)group_rows.size();
key_to_group.emplace(k, gi);
group_keys.push_back(k);
group_rows.emplace_back();
std::vector<const char*> bv(break_cols.size(), "");
for (size_t i = 0; i < break_cols.size(); ++i) {
int bc = break_cols[i];
bv[i] = (bc >= 0) ? in_cells[r * in_cols + bc] : "";
}
group_breakvals.push_back(std::move(bv));
} else gi = it->second;
group_rows[gi].push_back(r);
}
// Headers + types del output: breakouts + aggregation aliases.
int out_cols = (int)stage.breakouts.size() + (int)stage.aggregations.size();
out.cols = out_cols;
out.headers.reserve(out_cols);
out.types.reserve(out_cols);
for (size_t i = 0; i < stage.breakouts.size(); ++i) {
out.headers.push_back(stage.breakouts[i]);
int bc = break_cols[i];
out.types.push_back((bc >= 0 && bc < (int)in_types.size())
? in_types[bc] : ColumnType::String);
}
for (const auto& a : stage.aggregations) {
out.headers.push_back(aggregation_alias(a));
out.types.push_back(aggregation_type(a, in_headers, in_types));
}
// Compute aggregation values por grupo. Reservamos backing con tamaño exacto
// para que los punteros .c_str() no se invaliden.
int n_groups = (int)group_rows.size();
out.cell_backing.reserve((size_t)n_groups * stage.aggregations.size() + 16);
auto store_backing = [&](const std::string& s) -> const char* {
out.cell_backing.push_back(s);
return out.cell_backing.back().c_str();
};
// Construimos cells por grupo (filas no ordenadas todavia).
std::vector<const char*> flat;
flat.reserve((size_t)n_groups * out_cols);
for (int gi = 0; gi < n_groups; ++gi) {
// breakout values: punteros directos a in_cells (estables).
for (size_t i = 0; i < stage.breakouts.size(); ++i) {
flat.push_back(group_breakvals[gi][i]);
}
// aggregations
for (const auto& a : stage.aggregations) {
if (a.fn == AggFn::Count) {
flat.push_back(store_backing(format_double((double)group_rows[gi].size())));
continue;
}
if (a.fn == AggFn::Distinct) {
int ac = find_col(in_headers, a.col);
if (ac < 0) { flat.push_back(store_backing("0")); continue; }
std::unordered_set<std::string> uniq;
for (int r : group_rows[gi]) {
const char* v = in_cells[r * in_cols + ac];
if (v && *v) uniq.insert(v);
}
flat.push_back(store_backing(format_double((double)uniq.size())));
continue;
}
int ac = find_col(in_headers, a.col);
if (ac < 0) { flat.push_back(store_backing("")); continue; }
// min/max sobre strings preserva tipo
if ((a.fn == AggFn::Min || a.fn == AggFn::Max) &&
ac < (int)in_types.size() &&
(in_types[ac] == ColumnType::String || in_types[ac] == ColumnType::Date))
{
const char* best = nullptr;
for (int r : group_rows[gi]) {
const char* v = in_cells[r * in_cols + ac];
if (!v || !*v) continue;
if (!best) { best = v; continue; }
int c = std::strcmp(v, best);
if ((a.fn == AggFn::Min && c < 0) || (a.fn == AggFn::Max && c > 0)) best = v;
}
flat.push_back(best ? best : store_backing(""));
continue;
}
std::vector<double> vals;
vals.reserve(group_rows[gi].size());
for (int r : group_rows[gi]) {
const char* v = in_cells[r * in_cols + ac];
if (!v || !*v) continue;
double d;
if (parse_number(v, d)) vals.push_back(d);
}
double agg_val = compute_agg_numeric(a.fn, vals, a.arg);
flat.push_back(store_backing(format_double(agg_val)));
}
}
// Sort sobre los n_groups segun stage.sorts (col-name lookup en out.headers).
std::vector<int> grp_idx(n_groups);
for (int i = 0; i < n_groups; ++i) grp_idx[i] = i;
apply_sorts(grp_idx, flat.data(), out_cols, out.headers, stage.sorts);
out.rows = n_groups;
out.cells.reserve((size_t)n_groups * out_cols);
for (int gi : grp_idx) {
for (int c = 0; c < out_cols; ++c) {
out.cells.push_back(flat[gi * out_cols + c]);
}
}
return out;
}
// ----------------------------------------------------------------------------
// ViewMode helpers
// ----------------------------------------------------------------------------
struct ViewModeInfo {
ViewMode m;
const char* token;
const char* label;
int min_cols;
bool needs_num;
bool needs_cat;
bool needs_agg;
};
static const ViewModeInfo kViewModes[] = {
{ ViewMode::Table, "table", "Table", 1, false, false, false },
{ ViewMode::Bar, "bar", "Bar (horizontal)", 2, true, true, true },
{ ViewMode::Column, "column", "Column (vertical)", 2, true, true, true },
{ ViewMode::GroupedBar, "grouped_bar", "Grouped bar", 2, true, true, true },
{ ViewMode::StackedBar, "stacked_bar", "Stacked bar", 2, true, true, true },
{ ViewMode::Line, "line", "Line", 1, true, false, false },
{ ViewMode::Area, "area", "Area", 1, true, false, false },
{ ViewMode::Stairs, "stairs", "Stairs", 1, true, false, false },
{ ViewMode::Scatter, "scatter", "Scatter", 2, true, false, false },
{ ViewMode::Bubble, "bubble", "Bubble", 3, true, false, false },
{ ViewMode::Histogram, "histogram", "Histogram", 1, true, false, false },
{ ViewMode::Histogram2D, "hist2d", "Histogram 2D", 2, true, false, false },
{ ViewMode::Heatmap, "heatmap", "Heatmap", 1, true, false, false },
{ ViewMode::BoxPlot, "boxplot", "Box plot", 2, true, true, false },
{ ViewMode::Stem, "stem", "Stem", 1, true, false, false },
{ ViewMode::ErrorBars, "errorbars", "Error bars", 2, true, false, false },
{ ViewMode::Pie, "pie", "Pie", 2, true, true, true },
{ ViewMode::Donut, "donut", "Donut", 2, true, true, true },
{ ViewMode::Funnel, "funnel", "Funnel", 2, true, true, true },
{ ViewMode::Waterfall, "waterfall", "Waterfall", 1, true, false, true },
{ ViewMode::KPI, "kpi", "KPI (single)", 1, true, false, true },
{ ViewMode::KPIGrid, "kpi_grid", "KPI grid", 1, true, false, true },
{ ViewMode::Candlestick, "candlestick", "Candlestick (OHLC)", 4, true, false, false },
{ ViewMode::Radar, "radar", "Radar", 2, true, true, false },
};
static const int kViewModesN = (int)(sizeof(kViewModes) / sizeof(kViewModes[0]));
const char* view_mode_token(ViewMode m) {
for (int i = 0; i < kViewModesN; ++i) if (kViewModes[i].m == m) return kViewModes[i].token;
return "table";
}
const char* view_mode_label(ViewMode m) {
for (int i = 0; i < kViewModesN; ++i) if (kViewModes[i].m == m) return kViewModes[i].label;
return "Table";
}
ViewMode view_mode_from_token(const char* s) {
if (!s) return ViewMode::Table;
for (int i = 0; i < kViewModesN; ++i) {
if (std::strcmp(kViewModes[i].token, s) == 0) return kViewModes[i].m;
}
return ViewMode::Table;
}
int view_mode_min_cols(ViewMode m) {
for (int i = 0; i < kViewModesN; ++i) if (kViewModes[i].m == m) return kViewModes[i].min_cols;
return 1;
}
bool view_mode_needs_numeric(ViewMode m) {
for (int i = 0; i < kViewModesN; ++i) if (kViewModes[i].m == m) return kViewModes[i].needs_num;
return false;
}
bool view_mode_needs_category(ViewMode m) {
for (int i = 0; i < kViewModesN; ++i) if (kViewModes[i].m == m) return kViewModes[i].needs_cat;
return false;
}
bool view_mode_needs_aggregation(ViewMode m) {
for (int i = 0; i < kViewModesN; ++i) if (kViewModes[i].m == m) return kViewModes[i].needs_agg;
return false;
}
const ViewMode* all_view_modes(int* n_out) {
static ViewMode arr[64];
static bool init = false;
if (!init) {
for (int i = 0; i < kViewModesN; ++i) arr[i] = kViewModes[i].m;
init = true;
}
if (n_out) *n_out = kViewModesN;
return arr;
}
// ----------------------------------------------------------------------------
// Joins
// ----------------------------------------------------------------------------
int resolve_main_idx(const std::vector<TableInput>& tables, const std::string& main_source) {
if (tables.empty()) return -1;
if (main_source.empty()) return 0;
for (size_t i = 0; i < tables.size(); ++i) {
if (tables[i].name == main_source) return (int)i;
}
return 0;
}
const char* join_strategy_token(JoinStrategy s) {
switch (s) {
case JoinStrategy::Left: return "left";
case JoinStrategy::Inner: return "inner";
case JoinStrategy::Right: return "right";
case JoinStrategy::Full: return "full";
}
return "left";
}
JoinStrategy join_strategy_from_token(const char* s) {
if (!s) return JoinStrategy::Left;
if (std::strcmp(s, "inner") == 0) return JoinStrategy::Inner;
if (std::strcmp(s, "right") == 0) return JoinStrategy::Right;
if (std::strcmp(s, "full") == 0) return JoinStrategy::Full;
return JoinStrategy::Left;
}
const char* join_strategy_label(JoinStrategy s) {
switch (s) {
case JoinStrategy::Left: return "left-join";
case JoinStrategy::Inner: return "inner-join";
case JoinStrategy::Right: return "right-join";
case JoinStrategy::Full: return "full-join";
}
return "left-join";
}
namespace {
int find_col_idx(const std::vector<std::string>& hdrs, const std::string& name) {
for (size_t i = 0; i < hdrs.size(); ++i) if (hdrs[i] == name) return (int)i;
return -1;
}
std::string make_key(const char* const* cells, int row, int cols,
const std::vector<int>& key_cols) {
std::string k;
for (int c : key_cols) {
if (c < 0 || c >= cols) { k += "\x1f|"; continue; }
const char* s = cells[row * cols + c];
k += (s ? s : "");
k += "\x1f"; // separator
}
return k;
}
} // anon
StageOutput join_tables(const char* const* left_cells, int left_rows, int left_cols,
const std::vector<std::string>& left_headers,
const std::vector<ColumnType>& left_types,
const TableInput& right,
const Join& jn)
{
StageOutput out;
// Resolver indices de keys en left y right.
std::vector<int> lk_idx, rk_idx;
for (const auto& p : jn.on) {
lk_idx.push_back(find_col_idx(left_headers, p.first));
rk_idx.push_back(find_col_idx(right.headers, p.second));
}
// Resolver fields del derecho a incluir.
std::vector<int> right_fields;
if (jn.fields.empty()) {
for (int i = 0; i < right.cols; ++i) right_fields.push_back(i);
} else {
for (const auto& f : jn.fields) {
int i = find_col_idx(right.headers, f);
if (i >= 0) right_fields.push_back(i);
}
}
// Build output headers + types: left + alias.right_field.
out.cols = left_cols + (int)right_fields.size();
out.headers.reserve(out.cols);
out.types.reserve(out.cols);
for (int c = 0; c < left_cols; ++c) {
out.headers.push_back(c < (int)left_headers.size() ? left_headers[c] : "");
out.types.push_back(c < (int)left_types.size() ? left_types[c] : ColumnType::Auto);
}
for (int rc : right_fields) {
std::string prefixed = jn.alias.empty() ? right.headers[rc] : (jn.alias + "." + right.headers[rc]);
out.headers.push_back(std::move(prefixed));
out.types.push_back(rc < (int)right.types.size() ? right.types[rc] : ColumnType::Auto);
}
// Hash right rows por key.
std::unordered_map<std::string, std::vector<int>> right_idx;
right_idx.reserve(right.rows);
for (int r = 0; r < right.rows; ++r) {
right_idx[make_key(right.cells, r, right.cols, rk_idx)].push_back(r);
}
// Marca cuales right rows fueron usados (para right/full).
std::vector<bool> right_matched(right.rows, false);
// Backing strings para celdas.
out.cell_backing.reserve((size_t)(left_rows + right.rows) * out.cols);
auto append_left_row = [&](int lr) {
for (int c = 0; c < left_cols; ++c) {
const char* s = left_cells[lr * left_cols + c];
out.cell_backing.emplace_back(s ? s : "");
}
};
auto append_left_empty = [&]() {
for (int c = 0; c < left_cols; ++c) out.cell_backing.emplace_back("");
};
auto append_right_row = [&](int rr) {
for (int rc : right_fields) {
const char* s = right.cells[rr * right.cols + rc];
out.cell_backing.emplace_back(s ? s : "");
}
};
auto append_right_empty = [&]() {
for (int rc : right_fields) { (void)rc; out.cell_backing.emplace_back(""); }
};
bool include_left = (jn.strategy == JoinStrategy::Left || jn.strategy == JoinStrategy::Inner ||
jn.strategy == JoinStrategy::Full);
bool keep_unmatched_left = (jn.strategy == JoinStrategy::Left || jn.strategy == JoinStrategy::Full);
bool keep_unmatched_right = (jn.strategy == JoinStrategy::Right || jn.strategy == JoinStrategy::Full);
int row_count = 0;
if (include_left || jn.strategy == JoinStrategy::Right) {
for (int lr = 0; lr < left_rows; ++lr) {
std::string k = make_key(left_cells, lr, left_cols, lk_idx);
auto it = right_idx.find(k);
if (it == right_idx.end() || it->second.empty()) {
if (keep_unmatched_left) {
append_left_row(lr);
append_right_empty();
++row_count;
}
continue;
}
for (int rr : it->second) {
append_left_row(lr);
append_right_row(rr);
right_matched[rr] = true;
++row_count;
}
}
}
if (keep_unmatched_right) {
for (int rr = 0; rr < right.rows; ++rr) {
if (right_matched[rr]) continue;
append_left_empty();
append_right_row(rr);
++row_count;
}
}
out.rows = row_count;
// Punteros tras llenar backing.
out.cells.reserve(out.cell_backing.size());
for (auto& s : out.cell_backing) out.cells.push_back(s.c_str());
return out;
}
} // namespace data_table