Files
primitives_gallery/playground/tables/data_table_logic.cpp
T
egutierrez 100aeaa1fc chore: auto-commit (12 archivos)
- playground/tables/CMakeLists.txt
- playground/tables/data_table.cpp
- playground/tables/data_table_logic.cpp
- playground/tables/data_table_logic.h
- playground/tables/self_test.cpp
- playground/tables/tql.cpp
- playground/tables/viz.cpp
- playground/tables/viz.h
- playground/tables/llm_anthropic.cpp
- playground/tables/llm_anthropic.h
- ...

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-13 00:50:35 +02:00

1496 lines
54 KiB
C++

#include "data_table_logic.h"
#include <algorithm>
#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <string>
#include <unordered_map>
#include <unordered_set>
namespace data_table {
const char* op_label(Op o) {
switch (o) {
case Op::Eq: return "=";
case Op::Neq: return "!=";
case Op::Gt: return ">";
case Op::Gte: return ">=";
case Op::Lt: return "<";
case Op::Lte: return "<=";
case Op::Contains: return "contains";
case Op::NotContains: return "!contains";
case Op::StartsWith: return "starts";
case Op::EndsWith: return "ends";
}
return "?";
}
bool op_is_string_only(Op o) {
return o == Op::Contains || o == Op::NotContains ||
o == Op::StartsWith || o == Op::EndsWith;
}
const char* column_type_name(ColumnType t) {
switch (t) {
case ColumnType::Auto: return "auto";
case ColumnType::String: return "string";
case ColumnType::Int: return "int";
case ColumnType::Float: return "float";
case ColumnType::Bool: return "bool";
case ColumnType::Date: return "date";
case ColumnType::Json: return "json";
}
return "?";
}
// Icons Tabler (UTF-8). Mantenidos como strings para no forzar include de icons_tabler.h aqui.
const char* column_type_icon(ColumnType t) {
switch (t) {
case ColumnType::Auto: return "\xef\xa4\x9d"; // TI_HELP_CIRCLE
case ColumnType::String: return "\xef\x95\xa7"; // TI_ABC
case ColumnType::Int: return "\xef\x95\x94"; // TI_123
case ColumnType::Float: return "\xef\xa8\xa6"; // TI_DECIMAL
case ColumnType::Bool: return "\xee\xae\xa6"; // TI_CHECKBOX
case ColumnType::Date: return "\xee\xa9\x93"; // TI_CALENDAR
case ColumnType::Json: return "\xee\xaf\x8c"; // TI_BRACES
}
return "?";
}
std::vector<Op> ops_for_type(ColumnType t) {
switch (t) {
case ColumnType::Int:
case ColumnType::Float:
case ColumnType::Date:
return {Op::Eq, Op::Neq, Op::Gt, Op::Gte, Op::Lt, Op::Lte};
case ColumnType::Bool:
return {Op::Eq, Op::Neq};
case ColumnType::Json:
return {Op::Eq, Op::Neq, Op::Contains, Op::NotContains};
case ColumnType::String:
return {Op::Eq, Op::Neq, Op::Contains, Op::NotContains, Op::StartsWith, Op::EndsWith};
case ColumnType::Auto:
default:
return {Op::Eq, Op::Neq, Op::Contains, Op::NotContains};
}
}
namespace {
bool is_bool_text(const char* s) {
return std::strcmp(s, "true") == 0 || std::strcmp(s, "false") == 0;
}
bool is_date_iso(const char* s) {
// YYYY-MM-DD minimo
if (std::strlen(s) < 10) return false;
auto d = [](char c){ return c >= '0' && c <= '9'; };
return d(s[0]) && d(s[1]) && d(s[2]) && d(s[3]) && s[4] == '-' &&
d(s[5]) && d(s[6]) && s[7] == '-' && d(s[8]) && d(s[9]);
}
bool is_json_text(const char* s) {
while (*s == ' ' || *s == '\t') ++s;
return *s == '{' || *s == '[';
}
bool is_integer_text(const char* s) {
if (!*s) return false;
if (*s == '-' || *s == '+') ++s;
if (!*s) return false;
for (; *s; ++s) if (*s < '0' || *s > '9') return false;
return true;
}
} // anon
ColumnType auto_detect_type(const char* const* cells, int rows, int cols,
int col, int sample_n)
{
if (col < 0 || col >= cols) return ColumnType::String;
int n_total = 0, n_int = 0, n_float = 0, n_bool = 0, n_date = 0, n_json = 0;
for (int r = 0; r < rows && n_total < sample_n; ++r) {
const char* c = cells[r * cols + col];
if (!c || !*c) continue;
n_total++;
if (is_bool_text(c)) { n_bool++; continue; }
if (is_date_iso(c)) { n_date++; continue; }
if (is_json_text(c)) { n_json++; continue; }
double v;
if (parse_number(c, v)) {
if (is_integer_text(c)) n_int++;
else n_float++;
continue;
}
// string: no se cuenta a ningun tipo -> garantiza fallthrough a String
}
if (n_total == 0) return ColumnType::String;
if (n_bool == n_total) return ColumnType::Bool;
if (n_date == n_total) return ColumnType::Date;
if (n_json == n_total) return ColumnType::Json;
if (n_int + n_float == n_total) return (n_float > 0) ? ColumnType::Float : ColumnType::Int;
return ColumnType::String;
}
ColumnType effective_type(ColumnType declared, const char* const* cells,
int rows, int cols, int col)
{
if (declared != ColumnType::Auto) return declared;
return auto_detect_type(cells, rows, cols, col);
}
bool parse_number(const char* s, double& out) {
if (!s || !*s) return false;
char* end = nullptr;
double v = std::strtod(s, &end);
if (end == s) return false;
while (*end == ' ' || *end == '\t') end++;
if (*end != '\0') return false;
out = v;
return true;
}
bool compare(const char* a, const char* b, Op op) {
if (!a) a = "";
if (!b) b = "";
// Ops solo de string (siempre lexical, no intentan numeric).
switch (op) {
case Op::Contains: return std::strstr(a, b) != nullptr;
case Op::NotContains: return std::strstr(a, b) == nullptr;
case Op::StartsWith: {
size_t lb = std::strlen(b);
return std::strncmp(a, b, lb) == 0;
}
case Op::EndsWith: {
size_t la = std::strlen(a), lb = std::strlen(b);
return lb <= la && std::strcmp(a + la - lb, b) == 0;
}
default: break;
}
double na, nb;
bool numeric = parse_number(a, na) && parse_number(b, nb);
if (numeric) {
switch (op) {
case Op::Eq: return na == nb;
case Op::Neq: return na != nb;
case Op::Gt: return na > nb;
case Op::Gte: return na >= nb;
case Op::Lt: return na < nb;
case Op::Lte: return na <= nb;
default: break;
}
}
int c = std::strcmp(a, b);
switch (op) {
case Op::Eq: return c == 0;
case Op::Neq: return c != 0;
case Op::Gt: return c > 0;
case Op::Gte: return c >= 0;
case Op::Lt: return c < 0;
case Op::Lte: return c <= 0;
default: break;
}
return false;
}
// Helpers de State para acceso a stages.
void State::ensure_stage0() {
if (stages.empty()) stages.push_back(Stage{});
if (active_stage < 0) active_stage = 0;
if (active_stage >= (int)stages.size()) active_stage = (int)stages.size() - 1;
}
Stage& State::raw() { ensure_stage0(); return stages[0]; }
const Stage& State::raw() const {
static thread_local Stage empty;
if (stages.empty()) return empty;
return stages[0];
}
Stage& State::active() {
ensure_stage0();
return stages[active_stage];
}
const Stage& State::active_const() const {
static thread_local Stage empty;
if (stages.empty()) return empty;
int a = active_stage;
if (a < 0 || a >= (int)stages.size()) a = 0;
return stages[a];
}
// Compatibilidad: aplica filters + primer sort del stage 0 (Raw). Si el state
// no tiene stages, devuelve todas las filas sin filtrar. Util para tests y
// para el render path actual (que solo opera sobre Raw cuando no hay grouping).
std::vector<int> compute_visible_rows(const char* const* cells,
int rows, int cols,
const State& st)
{
std::vector<int> out;
out.reserve(rows);
const Stage& s = st.raw();
for (int r = 0; r < rows; ++r) {
bool keep = true;
for (const auto& f : s.filters) {
if (f.col < 0 || f.col >= cols) continue;
const char* cell = cells[r * cols + f.col];
if (!compare(cell, f.value.c_str(), f.op)) { keep = false; break; }
}
if (keep) out.push_back(r);
}
if (!s.sorts.empty()) {
// El stage 0 stores sorts as {col_name, desc}. Para compat, si el
// nombre es vacio o "@idx<N>", interpretamos como indice numerico.
const SortClause& sc0 = s.sorts.front();
int sc = -1;
// Permitir nombre numerico estilo "@idx<N>" o lookup posicional via
// primer caracter '@'. Sino, busqueda por header no posible aqui
// (no tenemos headers) — devuelve sin sort. Para compat de tests
// usamos nombre "@N" donde N es indice 0-based.
if (!sc0.col.empty() && sc0.col[0] == '@') {
sc = std::atoi(sc0.col.c_str() + 1);
}
bool desc = sc0.desc;
if (sc >= 0 && sc < cols) {
std::sort(out.begin(), out.end(), [&](int a, int b) {
const char* ca = cells[a * cols + sc];
const char* cb = cells[b * cols + sc];
if (!ca) ca = "";
if (!cb) cb = "";
double na, nb;
bool num = parse_number(ca, na) && parse_number(cb, nb);
int cmp;
if (num) cmp = (na < nb) ? -1 : (na > nb ? 1 : 0);
else cmp = std::strcmp(ca, cb);
return desc ? (cmp > 0) : (cmp < 0);
});
}
}
return out;
}
ColStats compute_column_stats(const char* const* cells, int rows, int cols,
int col, int unique_cap,
const int* indices, int n_indices)
{
ColStats s;
if (col < 0 || col >= cols) return s;
bool use_idx = (indices != nullptr && n_indices > 0);
int n = use_idx ? n_indices : rows;
s.total = n;
std::unordered_map<std::string, int> counts;
if (unique_cap > 0) counts.reserve(std::min(unique_cap, n));
bool all_numeric = true;
std::vector<double> nums;
nums.reserve(n);
for (int i = 0; i < n; ++i) {
int r = use_idx ? indices[i] : i;
if (r < 0 || r >= rows) continue;
const char* c = cells[r * cols + col];
if (!c || !*c) { s.empty_count++; continue; }
double v;
if (parse_number(c, v)) {
if (s.numeric_count == 0) { s.min = v; s.max = v; }
else {
if (v < s.min) s.min = v;
if (v > s.max) s.max = v;
}
s.sum += v;
s.numeric_count++;
nums.push_back(v);
} else {
all_numeric = false;
}
if (unique_cap == 0 || (int)counts.size() < unique_cap) {
counts[c]++;
} else {
auto it = counts.find(c);
if (it != counts.end()) it->second++;
else s.unique_capped = true;
}
}
s.unique_count = (int)counts.size();
s.numeric = all_numeric && s.numeric_count > 0;
if (s.numeric_count > 0) s.mean = s.sum / s.numeric_count;
// Top 8 categorias por count desc.
if (!counts.empty()) {
std::vector<std::pair<std::string,int>> v(counts.begin(), counts.end());
int topN = std::min<int>(8, (int)v.size());
std::partial_sort(v.begin(), v.begin() + topN, v.end(),
[](const auto& a, const auto& b){ return a.second > b.second; });
v.resize(topN);
s.top_categories = std::move(v);
}
if (s.numeric && !nums.empty()) {
std::sort(nums.begin(), nums.end());
auto pct = [&](double p) {
double idx = p * (nums.size() - 1);
size_t lo = (size_t)idx;
size_t hi = std::min(lo + 1, nums.size() - 1);
double t = idx - lo;
return nums[lo] * (1.0 - t) + nums[hi] * t;
};
s.p25 = pct(0.25);
s.p50 = pct(0.50);
s.p75 = pct(0.75);
s.hist.assign(HIST_BINS, 0.0f);
double range = s.max - s.min;
if (range <= 0) {
s.hist[HIST_BINS / 2] = (float)nums.size();
} else {
for (double v : nums) {
int b = (int)((v - s.min) / range * HIST_BINS);
if (b < 0) b = 0;
if (b >= HIST_BINS) b = HIST_BINS - 1;
s.hist[b] += 1.0f;
}
}
}
return s;
}
void reorder_column(State& st, int src, int dst) {
if (src == dst) return;
auto it_s = std::find(st.col_order.begin(), st.col_order.end(), src);
auto it_d = std::find(st.col_order.begin(), st.col_order.end(), dst);
if (it_s == st.col_order.end() || it_d == st.col_order.end()) return;
int si = (int)(it_s - st.col_order.begin());
int di = (int)(it_d - st.col_order.begin());
int v = st.col_order[si];
st.col_order.erase(st.col_order.begin() + si);
// Insertar en `di`: cubre ambos sentidos. Para si<di (drag derecha) el
// erase deja a dst en di-1 y queremos src JUSTO despues -> insert(di) lo
// coloca al final de la posicion logica original de dst. Para si>di
// (drag izquierda) dst sigue en di y src queda antes.
if (di > (int)st.col_order.size()) di = (int)st.col_order.size();
st.col_order.insert(st.col_order.begin() + di, v);
}
std::string csv_escape(const char* s) {
if (!s) return "";
bool needs = false;
for (const char* p = s; *p; ++p) {
if (*p == ',' || *p == '"' || *p == '\n' || *p == '\r') { needs = true; break; }
}
if (!needs) return std::string(s);
std::string out; out.reserve(std::strlen(s) + 4);
out += '"';
for (const char* p = s; *p; ++p) {
if (*p == '"') out += '"';
out += *p;
}
out += '"';
return out;
}
namespace {
std::string tsv_sanitize(const char* s) {
std::string out;
if (!s) return out;
out.reserve(std::strlen(s));
for (const char* p = s; *p; ++p) {
char ch = *p;
if (ch == '\t' || ch == '\n' || ch == '\r') ch = ' ';
out += ch;
}
return out;
}
} // anon
std::string build_tsv(const char* const* cells, int rows, int cols,
const char* const* headers,
const std::vector<int>& col_order,
const std::vector<bool>& col_visible,
const std::vector<int>& visible_rows,
int view_row_lo, int view_row_hi,
int view_col_lo, int view_col_hi)
{
if (col_order.empty() || visible_rows.empty()) return "";
int rmin = std::min(view_row_lo, view_row_hi);
int rmax = std::max(view_row_lo, view_row_hi);
int cmin = std::min(view_col_lo, view_col_hi);
int cmax = std::max(view_col_lo, view_col_hi);
rmin = std::max(0, rmin);
rmax = std::min((int)visible_rows.size() - 1, rmax);
cmin = std::max(0, cmin);
cmax = std::min((int)col_order.size() - 1, cmax);
std::string out;
bool first = true;
for (int oc = cmin; oc <= cmax; ++oc) {
int c = col_order[oc];
if (c < 0 || c >= cols) continue;
if (c < (int)col_visible.size() && !col_visible[c]) continue;
if (!first) out += '\t';
out += tsv_sanitize(headers[c]);
first = false;
}
out += '\n';
for (int ri = rmin; ri <= rmax; ++ri) {
int r = visible_rows[ri];
first = true;
for (int oc = cmin; oc <= cmax; ++oc) {
int c = col_order[oc];
if (c < 0 || c >= cols) continue;
if (c < (int)col_visible.size() && !col_visible[c]) continue;
if (!first) out += '\t';
out += tsv_sanitize(cells[r * cols + c]);
first = false;
}
out += '\n';
}
return out;
}
std::string build_csv(const char* const* cells, int rows, int cols,
const char* const* headers,
const std::vector<int>& col_order,
const std::vector<bool>& col_visible,
const std::vector<int>& visible_rows)
{
if (col_order.empty()) return "";
std::string out;
bool first = true;
for (int oc = 0; oc < (int)col_order.size(); ++oc) {
int c = col_order[oc];
if (c < 0 || c >= cols) continue;
if (c < (int)col_visible.size() && !col_visible[c]) continue;
if (!first) out += ',';
out += csv_escape(headers[c]);
first = false;
}
out += '\n';
for (int r : visible_rows) {
first = true;
for (int oc = 0; oc < (int)col_order.size(); ++oc) {
int c = col_order[oc];
if (c < 0 || c >= cols) continue;
if (c < (int)col_visible.size() && !col_visible[c]) continue;
if (!first) out += ',';
out += csv_escape(cells[r * cols + c]);
first = false;
}
out += '\n';
}
return out;
}
int find_open_bracket(const char* buf, int len, int cursor, std::string& filter_text) {
filter_text.clear();
if (!buf || cursor <= 0 || cursor > len) return -1;
for (int i = cursor - 1; i >= 0; --i) {
char c = buf[i];
if (c == ']' || c == '\n') return -1; // already closed or new line
if (c == '[') {
filter_text.assign(buf + i + 1, cursor - i - 1);
return i;
}
}
return -1;
}
std::string insert_column_ref(const std::string& src, int start, int cursor,
const std::string& name, int& new_cursor)
{
if (start < 0 || start > (int)src.size() || cursor < start || cursor > (int)src.size()) {
new_cursor = cursor;
return src;
}
std::string replacement = "[" + name + "]";
std::string out;
out.reserve(src.size() - (cursor - start) + replacement.size());
out.append(src, 0, start);
out += replacement;
out.append(src, cursor, std::string::npos);
new_cursor = start + (int)replacement.size();
return out;
}
// ----------------------------------------------------------------------------
// TQL stage compute
// ----------------------------------------------------------------------------
const char* agg_fn_name(AggFn f) {
switch (f) {
case AggFn::Count: return "count";
case AggFn::Sum: return "sum";
case AggFn::Avg: return "avg";
case AggFn::Min: return "min";
case AggFn::Max: return "max";
case AggFn::Distinct: return "distinct";
case AggFn::Stddev: return "stddev";
case AggFn::Median: return "median";
case AggFn::P25: return "p25";
case AggFn::P75: return "p75";
case AggFn::P90: return "p90";
case AggFn::P99: return "p99";
case AggFn::Percentile: return "percentile";
}
return "?";
}
std::string aggregation_alias(const Aggregation& a) {
if (!a.alias.empty()) return a.alias;
if (a.fn == AggFn::Count) return "count";
if (a.fn == AggFn::Percentile) {
int pct = (int)(a.arg * 100.0 + 0.5);
char buf[128];
std::snprintf(buf, sizeof(buf), "p%d_%s", pct, a.col.c_str());
return buf;
}
std::string out = agg_fn_name(a.fn);
out += '_';
out += a.col;
return out;
}
ColumnType aggregation_type(const Aggregation& a,
const std::vector<std::string>& in_headers,
const std::vector<ColumnType>& in_types)
{
if (a.fn == AggFn::Count || a.fn == AggFn::Distinct) return ColumnType::Int;
if (a.fn == AggFn::Min || a.fn == AggFn::Max) {
for (size_t i = 0; i < in_headers.size(); ++i) {
if (in_headers[i] == a.col && i < in_types.size()) return in_types[i];
}
return ColumnType::String;
}
return ColumnType::Float;
}
Filter make_drill_filter(int col_idx, const std::string& value) {
Filter f;
f.col = col_idx;
f.op = Op::Eq;
f.value = value;
return f;
}
bool apply_drill_step(State& st, const DrillStep& step) {
if (step.target_stage < 0 || step.target_stage >= (int)st.stages.size()) return false;
Stage& s = st.stages[step.target_stage];
int pos = step.filter_pos;
if (pos < 0 || pos > (int)s.filters.size()) return false;
s.filters.insert(s.filters.begin() + pos, step.added);
st.active_stage = step.target_stage;
return true;
}
bool drill_up(State& st) {
if (st.stages.empty()) return false;
if (st.active_stage <= 0) return false;
st.active_stage -= 1;
return true;
}
std::string row_to_tsv(const char* const* cells, int rows, int cols,
int row_idx, const std::vector<std::string>& headers) {
if (row_idx < 0 || row_idx >= rows || cols <= 0) return "";
std::string out;
for (int c = 0; c < cols; ++c) {
if (c > 0) out += '\t';
if (c < (int)headers.size()) out += headers[c];
}
out += "\r\n";
for (int c = 0; c < cols; ++c) {
if (c > 0) out += '\t';
const char* v = cells[row_idx * cols + c];
if (v) out += v;
}
out += "\r\n";
return out;
}
std::vector<Filter> build_filters_from_row(const char* const* cells, int rows,
int cols, int row_idx) {
std::vector<Filter> out;
if (row_idx < 0 || row_idx >= rows || cols <= 0) return out;
for (int c = 0; c < cols; ++c) {
const char* v = cells[row_idx * cols + c];
if (!v || !*v) continue;
Filter f;
f.col = c;
f.op = Op::Eq;
f.value = v;
out.push_back(f);
}
return out;
}
bool undo_drill_step(State& st, const DrillStep& step) {
if (step.target_stage < 0 || step.target_stage >= (int)st.stages.size()) return false;
Stage& s = st.stages[step.target_stage];
int pos = step.filter_pos;
if (pos < 0 || pos >= (int)s.filters.size()) return false;
s.filters.erase(s.filters.begin() + pos);
if (step.prev_active_stage >= 0 && step.prev_active_stage < (int)st.stages.size()) {
st.active_stage = step.prev_active_stage;
}
return true;
}
std::vector<int> apply_filters(const char* const* cells, int rows, int cols,
const std::vector<Filter>& filters)
{
std::vector<int> out;
out.reserve(rows);
for (int r = 0; r < rows; ++r) {
bool keep = true;
for (const auto& f : filters) {
if (f.col < 0 || f.col >= cols) continue;
const char* cell = cells[r * cols + f.col];
if (!compare(cell, f.value.c_str(), f.op)) { keep = false; break; }
}
if (keep) out.push_back(r);
}
return out;
}
namespace {
int find_col(const std::vector<std::string>& headers, const std::string& name) {
for (size_t i = 0; i < headers.size(); ++i) if (headers[i] == name) return (int)i;
return -1;
}
// Compara dos cells para sort: numerico si ambos parseables, sino lexical.
int cmp_cells(const char* a, const char* b) {
if (!a) a = ""; if (!b) b = "";
double na, nb;
bool num = parse_number(a, na) && parse_number(b, nb);
if (num) return (na < nb) ? -1 : (na > nb ? 1 : 0);
return std::strcmp(a, b);
}
void apply_sorts(std::vector<int>& row_idx,
const char* const* cells, int cols,
const std::vector<std::string>& headers,
const std::vector<SortClause>& sorts)
{
if (sorts.empty()) return;
std::vector<int> sort_cols(sorts.size());
for (size_t i = 0; i < sorts.size(); ++i) sort_cols[i] = find_col(headers, sorts[i].col);
std::sort(row_idx.begin(), row_idx.end(), [&](int a, int b){
for (size_t i = 0; i < sorts.size(); ++i) {
int sc = sort_cols[i];
if (sc < 0) continue;
int c = cmp_cells(cells[a * cols + sc], cells[b * cols + sc]);
if (c != 0) return sorts[i].desc ? (c > 0) : (c < 0);
}
return false;
});
}
double percentile_value(std::vector<double>& v, double p) {
if (v.empty()) return 0.0;
std::sort(v.begin(), v.end());
double idx = p * (v.size() - 1);
size_t lo = (size_t)idx;
size_t hi = std::min(lo + 1, v.size() - 1);
double t = idx - lo;
return v[lo] * (1.0 - t) + v[hi] * t;
}
double compute_agg_numeric(AggFn fn, std::vector<double>& vals, double arg) {
if (vals.empty()) return 0.0;
switch (fn) {
case AggFn::Sum: {
double s = 0; for (double v : vals) s += v; return s;
}
case AggFn::Avg: {
double s = 0; for (double v : vals) s += v; return s / vals.size();
}
case AggFn::Min: {
double m = vals[0]; for (double v : vals) if (v < m) m = v; return m;
}
case AggFn::Max: {
double m = vals[0]; for (double v : vals) if (v > m) m = v; return m;
}
case AggFn::Stddev: {
double s = 0; for (double v : vals) s += v;
double mean = s / vals.size();
double var = 0; for (double v : vals) { double d = v - mean; var += d * d; }
return std::sqrt(var / vals.size());
}
case AggFn::Median: return percentile_value(vals, 0.50);
case AggFn::P25: return percentile_value(vals, 0.25);
case AggFn::P75: return percentile_value(vals, 0.75);
case AggFn::P90: return percentile_value(vals, 0.90);
case AggFn::P99: return percentile_value(vals, 0.99);
case AggFn::Percentile: return percentile_value(vals, arg);
default: return 0.0;
}
}
std::string format_double(double v) {
char buf[64];
long long iv = (long long)v;
if ((double)iv == v) std::snprintf(buf, sizeof(buf), "%lld", iv);
else std::snprintf(buf, sizeof(buf), "%.4g", v);
return buf;
}
} // anon
StageOutput compute_stage(const char* const* in_cells, int in_rows, int in_cols,
const std::vector<std::string>& in_headers,
const std::vector<ColumnType>& in_types,
const Stage& stage)
{
StageOutput out;
auto visible = apply_filters(in_cells, in_rows, in_cols, stage.filters);
bool grouped = !stage.breakouts.empty() || !stage.aggregations.empty();
if (!grouped) {
// Passthrough: misma forma, filtrado + ordenado.
out.cols = in_cols;
out.headers = in_headers;
out.types = in_types;
// Sort sobre visible.
apply_sorts(visible, in_cells, in_cols, in_headers, stage.sorts);
out.rows = (int)visible.size();
out.cells.reserve((size_t)out.rows * in_cols);
for (int r : visible) {
for (int c = 0; c < in_cols; ++c) out.cells.push_back(in_cells[r * in_cols + c]);
}
return out;
}
// Grouped: agrupa visible por valores de breakout, calcula aggregations.
// Breakouts pueden llevar sufijo `:granularity` para cols Date (fase 10).
int nbreaks = (int)stage.breakouts.size();
std::vector<int> break_cols(nbreaks);
std::vector<DateGranularity> break_grans(nbreaks);
bool any_trunc = false;
for (int i = 0; i < nbreaks; ++i) {
std::string col_name;
break_grans[i] = parse_breakout_granularity(stage.breakouts[i], col_name);
if (break_grans[i] != DateGranularity::None) any_trunc = true;
break_cols[i] = find_col(in_headers, col_name);
}
// Pre-truncate solo cuando hay granularity activa. Strings persistidos en
// out.cell_backing para que los punteros sobrevivan al return de la funcion.
// Reservamos upfront para que push_back no invalide punteros anteriores.
// Tamaño = trunc cells + aggregation cells (peor caso n_groups <= in_rows).
out.cell_backing.reserve(
(size_t)in_rows * (size_t)nbreaks +
(size_t)in_rows * stage.aggregations.size() + 16);
std::vector<const char*> trunc_ptrs;
if (any_trunc) {
trunc_ptrs.assign((size_t)in_rows * (size_t)nbreaks, nullptr);
for (int r = 0; r < in_rows; ++r) {
for (int i = 0; i < nbreaks; ++i) {
if (break_grans[i] == DateGranularity::None) continue;
int bc = break_cols[i];
if (bc < 0) continue;
const char* v = in_cells[r * in_cols + bc];
out.cell_backing.emplace_back(
truncate_date(v ? v : "", break_grans[i]));
trunc_ptrs[(size_t)r * nbreaks + i] = out.cell_backing.back().c_str();
}
}
}
auto cell_for = [&](int r, int i) -> const char* {
int bc = break_cols[i];
if (bc < 0) return "";
if (break_grans[i] != DateGranularity::None) {
return trunc_ptrs[(size_t)r * nbreaks + i];
}
const char* v = in_cells[r * in_cols + bc];
return v ? v : "";
};
auto make_key = [&](int r) -> std::string {
std::string k;
for (int i = 0; i < nbreaks; ++i) {
if (i > 0) k += '\x1f'; // separador unit-separator (no aparece en datos)
k += cell_for(r, i);
}
return k;
};
// Mantenemos orden de aparicion para estabilidad pre-sort.
std::unordered_map<std::string, int> key_to_group;
std::vector<std::string> group_keys; // canonical, no usado salvo debug
std::vector<std::vector<int>> group_rows; // indices en in_cells por grupo
std::vector<std::vector<const char*>> group_breakvals; // valores break por grupo
for (int r : visible) {
std::string k = make_key(r);
auto it = key_to_group.find(k);
int gi;
if (it == key_to_group.end()) {
gi = (int)group_rows.size();
key_to_group.emplace(k, gi);
group_keys.push_back(k);
group_rows.emplace_back();
std::vector<const char*> bv((size_t)nbreaks, "");
for (int i = 0; i < nbreaks; ++i) {
bv[i] = cell_for(r, i);
}
group_breakvals.push_back(std::move(bv));
} else gi = it->second;
group_rows[gi].push_back(r);
}
// Headers + types del output: breakouts + aggregation aliases.
int out_cols = (int)stage.breakouts.size() + (int)stage.aggregations.size();
out.cols = out_cols;
out.headers.reserve(out_cols);
out.types.reserve(out_cols);
for (int i = 0; i < nbreaks; ++i) {
out.headers.push_back(stage.breakouts[i]);
int bc = break_cols[i];
// Si hay granularity activa, el output es String (formato ymd o similar),
// no la fecha original.
ColumnType ot = ColumnType::String;
if (break_grans[i] == DateGranularity::None
&& bc >= 0 && bc < (int)in_types.size()) {
ot = in_types[bc];
}
out.types.push_back(ot);
}
for (const auto& a : stage.aggregations) {
out.headers.push_back(aggregation_alias(a));
out.types.push_back(aggregation_type(a, in_headers, in_types));
}
// Compute aggregation values por grupo. Reservamos backing con tamaño exacto
// para que los punteros .c_str() no se invaliden.
int n_groups = (int)group_rows.size();
out.cell_backing.reserve((size_t)n_groups * stage.aggregations.size() + 16);
auto store_backing = [&](const std::string& s) -> const char* {
out.cell_backing.push_back(s);
return out.cell_backing.back().c_str();
};
// Construimos cells por grupo (filas no ordenadas todavia).
std::vector<const char*> flat;
flat.reserve((size_t)n_groups * out_cols);
for (int gi = 0; gi < n_groups; ++gi) {
// breakout values: punteros directos a in_cells (estables).
for (size_t i = 0; i < stage.breakouts.size(); ++i) {
flat.push_back(group_breakvals[gi][i]);
}
// aggregations
for (const auto& a : stage.aggregations) {
if (a.fn == AggFn::Count) {
flat.push_back(store_backing(format_double((double)group_rows[gi].size())));
continue;
}
if (a.fn == AggFn::Distinct) {
int ac = find_col(in_headers, a.col);
if (ac < 0) { flat.push_back(store_backing("0")); continue; }
std::unordered_set<std::string> uniq;
for (int r : group_rows[gi]) {
const char* v = in_cells[r * in_cols + ac];
if (v && *v) uniq.insert(v);
}
flat.push_back(store_backing(format_double((double)uniq.size())));
continue;
}
int ac = find_col(in_headers, a.col);
if (ac < 0) { flat.push_back(store_backing("")); continue; }
// min/max sobre strings preserva tipo
if ((a.fn == AggFn::Min || a.fn == AggFn::Max) &&
ac < (int)in_types.size() &&
(in_types[ac] == ColumnType::String || in_types[ac] == ColumnType::Date))
{
const char* best = nullptr;
for (int r : group_rows[gi]) {
const char* v = in_cells[r * in_cols + ac];
if (!v || !*v) continue;
if (!best) { best = v; continue; }
int c = std::strcmp(v, best);
if ((a.fn == AggFn::Min && c < 0) || (a.fn == AggFn::Max && c > 0)) best = v;
}
flat.push_back(best ? best : store_backing(""));
continue;
}
std::vector<double> vals;
vals.reserve(group_rows[gi].size());
for (int r : group_rows[gi]) {
const char* v = in_cells[r * in_cols + ac];
if (!v || !*v) continue;
double d;
if (parse_number(v, d)) vals.push_back(d);
}
double agg_val = compute_agg_numeric(a.fn, vals, a.arg);
flat.push_back(store_backing(format_double(agg_val)));
}
}
// Sort sobre los n_groups segun stage.sorts (col-name lookup en out.headers).
std::vector<int> grp_idx(n_groups);
for (int i = 0; i < n_groups; ++i) grp_idx[i] = i;
apply_sorts(grp_idx, flat.data(), out_cols, out.headers, stage.sorts);
out.rows = n_groups;
out.cells.reserve((size_t)n_groups * out_cols);
for (int gi : grp_idx) {
for (int c = 0; c < out_cols; ++c) {
out.cells.push_back(flat[gi * out_cols + c]);
}
}
return out;
}
// ----------------------------------------------------------------------------
// ViewMode helpers
// ----------------------------------------------------------------------------
struct ViewModeInfo {
ViewMode m;
const char* token;
const char* label;
int min_cols;
bool needs_num;
bool needs_cat;
bool needs_agg;
};
static const ViewModeInfo kViewModes[] = {
{ ViewMode::Table, "table", "Table", 1, false, false, false },
{ ViewMode::Bar, "bar", "Bar (horizontal)", 2, true, true, true },
{ ViewMode::Column, "column", "Column (vertical)", 2, true, true, true },
{ ViewMode::GroupedBar, "grouped_bar", "Grouped bar", 2, true, true, true },
{ ViewMode::StackedBar, "stacked_bar", "Stacked bar", 2, true, true, true },
{ ViewMode::Line, "line", "Line", 1, true, false, false },
{ ViewMode::Area, "area", "Area", 1, true, false, false },
{ ViewMode::Stairs, "stairs", "Stairs", 1, true, false, false },
{ ViewMode::Scatter, "scatter", "Scatter", 2, true, false, false },
{ ViewMode::Bubble, "bubble", "Bubble", 3, true, false, false },
{ ViewMode::Histogram, "histogram", "Histogram", 1, true, false, false },
{ ViewMode::Histogram2D, "hist2d", "Histogram 2D", 2, true, false, false },
{ ViewMode::Heatmap, "heatmap", "Heatmap", 1, true, false, false },
{ ViewMode::BoxPlot, "boxplot", "Box plot", 2, true, true, false },
{ ViewMode::Stem, "stem", "Stem", 1, true, false, false },
{ ViewMode::ErrorBars, "errorbars", "Error bars", 2, true, false, false },
{ ViewMode::Pie, "pie", "Pie", 2, true, true, true },
{ ViewMode::Donut, "donut", "Donut", 2, true, true, true },
{ ViewMode::Funnel, "funnel", "Funnel", 2, true, true, true },
{ ViewMode::Waterfall, "waterfall", "Waterfall", 1, true, false, true },
{ ViewMode::KPI, "kpi", "KPI (single)", 1, true, false, true },
{ ViewMode::KPIGrid, "kpi_grid", "KPI grid", 1, true, false, true },
{ ViewMode::Candlestick, "candlestick", "Candlestick (OHLC)", 4, true, false, false },
{ ViewMode::Radar, "radar", "Radar", 2, true, true, false },
};
static const int kViewModesN = (int)(sizeof(kViewModes) / sizeof(kViewModes[0]));
const char* view_mode_token(ViewMode m) {
for (int i = 0; i < kViewModesN; ++i) if (kViewModes[i].m == m) return kViewModes[i].token;
return "table";
}
const char* view_mode_label(ViewMode m) {
for (int i = 0; i < kViewModesN; ++i) if (kViewModes[i].m == m) return kViewModes[i].label;
return "Table";
}
ViewMode view_mode_from_token(const char* s) {
if (!s) return ViewMode::Table;
for (int i = 0; i < kViewModesN; ++i) {
if (std::strcmp(kViewModes[i].token, s) == 0) return kViewModes[i].m;
}
return ViewMode::Table;
}
int view_mode_min_cols(ViewMode m) {
for (int i = 0; i < kViewModesN; ++i) if (kViewModes[i].m == m) return kViewModes[i].min_cols;
return 1;
}
bool view_mode_needs_numeric(ViewMode m) {
for (int i = 0; i < kViewModesN; ++i) if (kViewModes[i].m == m) return kViewModes[i].needs_num;
return false;
}
bool view_mode_needs_category(ViewMode m) {
for (int i = 0; i < kViewModesN; ++i) if (kViewModes[i].m == m) return kViewModes[i].needs_cat;
return false;
}
bool view_mode_needs_aggregation(ViewMode m) {
for (int i = 0; i < kViewModesN; ++i) if (kViewModes[i].m == m) return kViewModes[i].needs_agg;
return false;
}
const ViewMode* all_view_modes(int* n_out) {
static ViewMode arr[64];
static bool init = false;
if (!init) {
for (int i = 0; i < kViewModesN; ++i) arr[i] = kViewModes[i].m;
init = true;
}
if (n_out) *n_out = kViewModesN;
return arr;
}
// ----------------------------------------------------------------------------
// Joins
// ----------------------------------------------------------------------------
int resolve_main_idx(const std::vector<TableInput>& tables, const std::string& main_source) {
if (tables.empty()) return -1;
if (main_source.empty()) return 0;
for (size_t i = 0; i < tables.size(); ++i) {
if (tables[i].name == main_source) return (int)i;
}
return 0;
}
const char* join_strategy_token(JoinStrategy s) {
switch (s) {
case JoinStrategy::Left: return "left";
case JoinStrategy::Inner: return "inner";
case JoinStrategy::Right: return "right";
case JoinStrategy::Full: return "full";
}
return "left";
}
JoinStrategy join_strategy_from_token(const char* s) {
if (!s) return JoinStrategy::Left;
if (std::strcmp(s, "inner") == 0) return JoinStrategy::Inner;
if (std::strcmp(s, "right") == 0) return JoinStrategy::Right;
if (std::strcmp(s, "full") == 0) return JoinStrategy::Full;
return JoinStrategy::Left;
}
const char* join_strategy_label(JoinStrategy s) {
switch (s) {
case JoinStrategy::Left: return "left-join";
case JoinStrategy::Inner: return "inner-join";
case JoinStrategy::Right: return "right-join";
case JoinStrategy::Full: return "full-join";
}
return "left-join";
}
namespace {
int find_col_idx(const std::vector<std::string>& hdrs, const std::string& name) {
for (size_t i = 0; i < hdrs.size(); ++i) if (hdrs[i] == name) return (int)i;
return -1;
}
std::string make_key(const char* const* cells, int row, int cols,
const std::vector<int>& key_cols) {
std::string k;
for (int c : key_cols) {
if (c < 0 || c >= cols) { k += "\x1f|"; continue; }
const char* s = cells[row * cols + c];
k += (s ? s : "");
k += "\x1f"; // separator
}
return k;
}
} // anon
StageOutput join_tables(const char* const* left_cells, int left_rows, int left_cols,
const std::vector<std::string>& left_headers,
const std::vector<ColumnType>& left_types,
const TableInput& right,
const Join& jn)
{
StageOutput out;
// Resolver indices de keys en left y right.
std::vector<int> lk_idx, rk_idx;
for (const auto& p : jn.on) {
lk_idx.push_back(find_col_idx(left_headers, p.first));
rk_idx.push_back(find_col_idx(right.headers, p.second));
}
// Resolver fields del derecho a incluir.
std::vector<int> right_fields;
if (jn.fields.empty()) {
for (int i = 0; i < right.cols; ++i) right_fields.push_back(i);
} else {
for (const auto& f : jn.fields) {
int i = find_col_idx(right.headers, f);
if (i >= 0) right_fields.push_back(i);
}
}
// Build output headers + types: left + alias.right_field.
out.cols = left_cols + (int)right_fields.size();
out.headers.reserve(out.cols);
out.types.reserve(out.cols);
for (int c = 0; c < left_cols; ++c) {
out.headers.push_back(c < (int)left_headers.size() ? left_headers[c] : "");
out.types.push_back(c < (int)left_types.size() ? left_types[c] : ColumnType::Auto);
}
for (int rc : right_fields) {
std::string prefixed = jn.alias.empty() ? right.headers[rc] : (jn.alias + "." + right.headers[rc]);
out.headers.push_back(std::move(prefixed));
out.types.push_back(rc < (int)right.types.size() ? right.types[rc] : ColumnType::Auto);
}
// Hash right rows por key.
std::unordered_map<std::string, std::vector<int>> right_idx;
right_idx.reserve(right.rows);
for (int r = 0; r < right.rows; ++r) {
right_idx[make_key(right.cells, r, right.cols, rk_idx)].push_back(r);
}
// Marca cuales right rows fueron usados (para right/full).
std::vector<bool> right_matched(right.rows, false);
// Backing strings para celdas.
out.cell_backing.reserve((size_t)(left_rows + right.rows) * out.cols);
auto append_left_row = [&](int lr) {
for (int c = 0; c < left_cols; ++c) {
const char* s = left_cells[lr * left_cols + c];
out.cell_backing.emplace_back(s ? s : "");
}
};
auto append_left_empty = [&]() {
for (int c = 0; c < left_cols; ++c) out.cell_backing.emplace_back("");
};
auto append_right_row = [&](int rr) {
for (int rc : right_fields) {
const char* s = right.cells[rr * right.cols + rc];
out.cell_backing.emplace_back(s ? s : "");
}
};
auto append_right_empty = [&]() {
for (int rc : right_fields) { (void)rc; out.cell_backing.emplace_back(""); }
};
bool include_left = (jn.strategy == JoinStrategy::Left || jn.strategy == JoinStrategy::Inner ||
jn.strategy == JoinStrategy::Full);
bool keep_unmatched_left = (jn.strategy == JoinStrategy::Left || jn.strategy == JoinStrategy::Full);
bool keep_unmatched_right = (jn.strategy == JoinStrategy::Right || jn.strategy == JoinStrategy::Full);
int row_count = 0;
if (include_left || jn.strategy == JoinStrategy::Right) {
for (int lr = 0; lr < left_rows; ++lr) {
std::string k = make_key(left_cells, lr, left_cols, lk_idx);
auto it = right_idx.find(k);
if (it == right_idx.end() || it->second.empty()) {
if (keep_unmatched_left) {
append_left_row(lr);
append_right_empty();
++row_count;
}
continue;
}
for (int rr : it->second) {
append_left_row(lr);
append_right_row(rr);
right_matched[rr] = true;
++row_count;
}
}
}
if (keep_unmatched_right) {
for (int rr = 0; rr < right.rows; ++rr) {
if (right_matched[rr]) continue;
append_left_empty();
append_right_row(rr);
++row_count;
}
}
out.rows = row_count;
// Punteros tras llenar backing.
out.cells.reserve(out.cell_backing.size());
for (auto& s : out.cell_backing) out.cells.push_back(s.c_str());
return out;
}
// ----------------------------------------------------------------------------
// Fase 10: drill extendido — granularity + presets.
// ----------------------------------------------------------------------------
const char* date_granularity_token(DateGranularity g) {
switch (g) {
case DateGranularity::Year: return "year";
case DateGranularity::Month: return "month";
case DateGranularity::Week: return "week";
case DateGranularity::Day: return "day";
case DateGranularity::Hour: return "hour";
default: return "";
}
}
DateGranularity date_granularity_from_token(const char* s) {
if (!s) return DateGranularity::None;
std::string t(s);
if (t == "year") return DateGranularity::Year;
if (t == "month") return DateGranularity::Month;
if (t == "week") return DateGranularity::Week;
if (t == "day") return DateGranularity::Day;
if (t == "hour") return DateGranularity::Hour;
return DateGranularity::None;
}
DateGranularity parse_breakout_granularity(const std::string& breakout,
std::string& col_out) {
auto pos = breakout.rfind(':');
if (pos == std::string::npos) {
col_out = breakout;
return DateGranularity::None;
}
std::string suffix = breakout.substr(pos + 1);
DateGranularity g = date_granularity_from_token(suffix.c_str());
if (g == DateGranularity::None) {
col_out = breakout;
return DateGranularity::None;
}
col_out = breakout.substr(0, pos);
return g;
}
std::string compose_breakout(const std::string& col, DateGranularity g) {
if (g == DateGranularity::None) return col;
return col + ":" + date_granularity_token(g);
}
int nearest_index_1d(double target, const double* xs, int n) {
if (n <= 0 || !xs) return -1;
int best = -1;
double best_d = 0.0;
for (int i = 0; i < n; ++i) {
double v = xs[i];
if (std::isnan(v)) continue;
double d = std::fabs(v - target);
if (best < 0 || d < best_d) { best = i; best_d = d; }
}
return best;
}
int nearest_index_2d(double tx, double ty,
const double* xs, const double* ys, int n) {
if (n <= 0 || !xs || !ys) return -1;
int best = -1;
double best_d = 0.0;
for (int i = 0; i < n; ++i) {
double x = xs[i], y = ys[i];
if (std::isnan(x) || std::isnan(y)) continue;
double dx = x - tx, dy = y - ty;
double d = dx*dx + dy*dy;
if (best < 0 || d < best_d) { best = i; best_d = d; }
}
return best;
}
double pie_angle(double cx, double cy, double mx, double my) {
// ImPlot pie: 0 = top, sentido horario. atan2 estandar: 0 = +X (right), CCW.
// Conversion: ImPlot angle = atan2(dx, -dy) y normalizar a [0, 2*PI).
double dx = mx - cx;
double dy = my - cy;
double a = std::atan2(dx, -dy); // 0 cuando (dx=0, dy<0) = top
const double two_pi = 6.283185307179586;
if (a < 0) a += two_pi;
return a;
}
int pie_slice_at_angle(double angle, const double* sums, int n) {
if (n <= 0 || !sums) return -1;
double total = 0.0;
for (int i = 0; i < n; ++i) {
if (sums[i] < 0) return -1;
total += sums[i];
}
if (total <= 0.0) return -1;
const double two_pi = 6.283185307179586;
if (angle < 0 || angle >= two_pi) return -1;
double cum = 0.0;
for (int i = 0; i < n; ++i) {
cum += (sums[i] / total) * two_pi;
if (angle < cum) return i;
}
return n - 1; // edge case rounding
}
void heatmap_cell_at(double px, double py, int rows, int cols,
int& row_out, int& col_out) {
row_out = -1;
col_out = -1;
if (rows <= 0 || cols <= 0) return;
if (px < 0.0 || px >= (double)cols) return;
if (py < 0.0 || py >= (double)rows) return;
col_out = (int)px;
// ImPlot heatmap pinta row 0 arriba; plot Y suele invertirse. Caller
// normaliza si necesita. Aqui devolvemos row = floor(py) en coord plot.
row_out = (int)py;
}
void column_min_max(const char* const* cells, int rows, int cols, int col_idx,
std::string& min_out, std::string& max_out) {
min_out.clear();
max_out.clear();
if (col_idx < 0 || col_idx >= cols) return;
bool first = true;
for (int r = 0; r < rows; ++r) {
const char* v = cells[r * cols + col_idx];
if (!v || !*v) continue;
std::string s(v);
if (first) {
min_out = s;
max_out = s;
first = false;
} else {
if (s < min_out) min_out = s;
if (s > max_out) max_out = s;
}
}
}
namespace {
// Parse ISO "YYYY-MM-DD..." -> (y, m, d). True si los 3 primeros campos OK.
bool parse_ymd(const std::string& s, int& y, int& m, int& d) {
if (s.size() < 10) return false;
for (int i : {0,1,2,3,5,6,8,9}) {
if (s[(size_t)i] < '0' || s[(size_t)i] > '9') return false;
}
if (s[4] != '-' || s[7] != '-') return false;
y = (s[0]-'0')*1000 + (s[1]-'0')*100 + (s[2]-'0')*10 + (s[3]-'0');
m = (s[5]-'0')*10 + (s[6]-'0');
d = (s[8]-'0')*10 + (s[9]-'0');
if (m < 1 || m > 12 || d < 1 || d > 31) return false;
return true;
}
// Dias desde 0001-01-01 (proleptic Gregorian).
long ymd_to_days(int y, int m, int d) {
if (m <= 2) { y -= 1; m += 12; }
long era = (y >= 0 ? y : y - 399) / 400;
unsigned yoe = (unsigned)(y - era * 400);
unsigned doy = (unsigned)((153 * (m - 3) + 2) / 5 + d - 1);
unsigned doe = yoe * 365 + yoe/4 - yoe/100 + doy;
return era * 146097 + (long)doe;
}
void days_to_ymd(long days, int& y, int& m, int& d) {
long era = (days >= 0 ? days : days - 146096) / 146097;
unsigned doe = (unsigned)(days - era * 146097);
unsigned yoe = (doe - doe/1460 + doe/36524 - doe/146096) / 365;
int yr = (int)yoe + (int)era * 400;
unsigned doy = doe - (365*yoe + yoe/4 - yoe/100);
unsigned mp = (5*doy + 2)/153;
unsigned day = doy - (153*mp + 2)/5 + 1;
unsigned mon = mp < 10 ? mp + 3 : mp - 9;
if (mon <= 2) yr += 1;
y = yr; m = (int)mon; d = (int)day;
}
} // anon
std::string truncate_date(const std::string& date, DateGranularity g) {
if (g == DateGranularity::None) return date;
int y, m, d;
if (!parse_ymd(date, y, m, d)) return date;
char buf[32];
switch (g) {
case DateGranularity::Year:
std::snprintf(buf, sizeof(buf), "%04d", y);
return buf;
case DateGranularity::Month:
std::snprintf(buf, sizeof(buf), "%04d-%02d", y, m);
return buf;
case DateGranularity::Day:
std::snprintf(buf, sizeof(buf), "%04d-%02d-%02d", y, m, d);
return buf;
case DateGranularity::Hour: {
int hh = 0;
if (date.size() >= 13 && date[10] == 'T'
&& date[11] >= '0' && date[11] <= '9'
&& date[12] >= '0' && date[12] <= '9') {
hh = (date[11]-'0')*10 + (date[12]-'0');
if (hh < 0 || hh > 23) hh = 0;
}
std::snprintf(buf, sizeof(buf), "%04d-%02d-%02dT%02d", y, m, d, hh);
return buf;
}
case DateGranularity::Week: {
// Hinnant ymd_to_days: day 0 == 0000-03-01 (Wednesday).
// days%7: 0=Wed, 1=Thu, 2=Fri, 3=Sat, 4=Sun, 5=Mon, 6=Tue.
// Monday offset: (mod - 5 + 7) % 7.
long days = ymd_to_days(y, m, d);
int mod = (int)(((days % 7) + 7) % 7);
int rem = ((mod - 5) % 7 + 7) % 7;
long monday = days - rem;
int yy, mm, dd;
days_to_ymd(monday, yy, mm, dd);
std::snprintf(buf, sizeof(buf), "%04d-%02d-%02d", yy, mm, dd);
return buf;
}
default: return date;
}
}
DateGranularity auto_date_granularity(const std::string& min_ymd,
const std::string& max_ymd) {
int y1,m1,d1, y2,m2,d2;
if (!parse_ymd(min_ymd, y1,m1,d1)) return DateGranularity::Day;
if (!parse_ymd(max_ymd, y2,m2,d2)) return DateGranularity::Day;
long span = ymd_to_days(y2,m2,d2) - ymd_to_days(y1,m1,d1);
if (span < 0) span = -span;
if (span > 730) return DateGranularity::Year; // >2 anios
if (span > 60) return DateGranularity::Month;
if (span > 14) return DateGranularity::Week;
return DateGranularity::Day;
}
const char* filter_preset_label(FilterPreset p) {
switch (p) {
case FilterPreset::Last7d: return "Last 7 days";
case FilterPreset::Last30d: return "Last 30 days";
case FilterPreset::Last90d: return "Last 90 days";
case FilterPreset::ExcludeNulls: return "Exclude nulls";
case FilterPreset::NonZero: return "Non-zero only";
}
return "?";
}
std::vector<Filter> build_preset_filters(FilterPreset preset, int col,
const std::string& today_ymd) {
std::vector<Filter> out;
auto last_n = [&](int n) {
int y, m, d;
if (!parse_ymd(today_ymd, y, m, d)) return;
long days = ymd_to_days(y, m, d) - n;
int yy, mm, dd;
days_to_ymd(days, yy, mm, dd);
char buf[16];
std::snprintf(buf, sizeof(buf), "%04d-%02d-%02d", yy, mm, dd);
Filter f;
f.col = col;
f.op = Op::Gte;
f.value = buf;
out.push_back(f);
};
switch (preset) {
case FilterPreset::Last7d: last_n(7); break;
case FilterPreset::Last30d: last_n(30); break;
case FilterPreset::Last90d: last_n(90); break;
case FilterPreset::ExcludeNulls: {
Filter f; f.col = col; f.op = Op::Neq; f.value = "";
out.push_back(f);
break;
}
case FilterPreset::NonZero: {
Filter f1; f1.col = col; f1.op = Op::Neq; f1.value = "";
Filter f2; f2.col = col; f2.op = Op::Neq; f2.value = "0";
out.push_back(f1);
out.push_back(f2);
break;
}
}
return out;
}
} // namespace data_table