chore: auto-commit (12 archivos)

- playground/tables/CMakeLists.txt
- playground/tables/data_table.cpp
- playground/tables/data_table_logic.cpp
- playground/tables/data_table_logic.h
- playground/tables/self_test.cpp
- playground/tables/tql.cpp
- playground/tables/viz.cpp
- playground/tables/viz.h
- playground/tables/llm_anthropic.cpp
- playground/tables/llm_anthropic.h
- ...

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-13 00:50:35 +02:00
parent d782d463cb
commit 100aeaa1fc
12 changed files with 3040 additions and 291 deletions
+405 -15
View File
@@ -567,6 +567,69 @@ Filter make_drill_filter(int col_idx, const std::string& value) {
return f;
}
bool apply_drill_step(State& st, const DrillStep& step) {
if (step.target_stage < 0 || step.target_stage >= (int)st.stages.size()) return false;
Stage& s = st.stages[step.target_stage];
int pos = step.filter_pos;
if (pos < 0 || pos > (int)s.filters.size()) return false;
s.filters.insert(s.filters.begin() + pos, step.added);
st.active_stage = step.target_stage;
return true;
}
bool drill_up(State& st) {
if (st.stages.empty()) return false;
if (st.active_stage <= 0) return false;
st.active_stage -= 1;
return true;
}
std::string row_to_tsv(const char* const* cells, int rows, int cols,
int row_idx, const std::vector<std::string>& headers) {
if (row_idx < 0 || row_idx >= rows || cols <= 0) return "";
std::string out;
for (int c = 0; c < cols; ++c) {
if (c > 0) out += '\t';
if (c < (int)headers.size()) out += headers[c];
}
out += "\r\n";
for (int c = 0; c < cols; ++c) {
if (c > 0) out += '\t';
const char* v = cells[row_idx * cols + c];
if (v) out += v;
}
out += "\r\n";
return out;
}
std::vector<Filter> build_filters_from_row(const char* const* cells, int rows,
int cols, int row_idx) {
std::vector<Filter> out;
if (row_idx < 0 || row_idx >= rows || cols <= 0) return out;
for (int c = 0; c < cols; ++c) {
const char* v = cells[row_idx * cols + c];
if (!v || !*v) continue;
Filter f;
f.col = c;
f.op = Op::Eq;
f.value = v;
out.push_back(f);
}
return out;
}
bool undo_drill_step(State& st, const DrillStep& step) {
if (step.target_stage < 0 || step.target_stage >= (int)st.stages.size()) return false;
Stage& s = st.stages[step.target_stage];
int pos = step.filter_pos;
if (pos < 0 || pos >= (int)s.filters.size()) return false;
s.filters.erase(s.filters.begin() + pos);
if (step.prev_active_stage >= 0 && step.prev_active_stage < (int)st.stages.size()) {
st.active_stage = step.prev_active_stage;
}
return true;
}
std::vector<int> apply_filters(const char* const* cells, int rows, int cols,
const std::vector<Filter>& filters)
{
@@ -696,19 +759,57 @@ StageOutput compute_stage(const char* const* in_cells, int in_rows, int in_cols,
}
// Grouped: agrupa visible por valores de breakout, calcula aggregations.
std::vector<int> break_cols(stage.breakouts.size());
for (size_t i = 0; i < stage.breakouts.size(); ++i) {
break_cols[i] = find_col(in_headers, stage.breakouts[i]);
// Breakouts pueden llevar sufijo `:granularity` para cols Date (fase 10).
int nbreaks = (int)stage.breakouts.size();
std::vector<int> break_cols(nbreaks);
std::vector<DateGranularity> break_grans(nbreaks);
bool any_trunc = false;
for (int i = 0; i < nbreaks; ++i) {
std::string col_name;
break_grans[i] = parse_breakout_granularity(stage.breakouts[i], col_name);
if (break_grans[i] != DateGranularity::None) any_trunc = true;
break_cols[i] = find_col(in_headers, col_name);
}
// Pre-truncate solo cuando hay granularity activa. Strings persistidos en
// out.cell_backing para que los punteros sobrevivan al return de la funcion.
// Reservamos upfront para que push_back no invalide punteros anteriores.
// Tamaño = trunc cells + aggregation cells (peor caso n_groups <= in_rows).
out.cell_backing.reserve(
(size_t)in_rows * (size_t)nbreaks +
(size_t)in_rows * stage.aggregations.size() + 16);
std::vector<const char*> trunc_ptrs;
if (any_trunc) {
trunc_ptrs.assign((size_t)in_rows * (size_t)nbreaks, nullptr);
for (int r = 0; r < in_rows; ++r) {
for (int i = 0; i < nbreaks; ++i) {
if (break_grans[i] == DateGranularity::None) continue;
int bc = break_cols[i];
if (bc < 0) continue;
const char* v = in_cells[r * in_cols + bc];
out.cell_backing.emplace_back(
truncate_date(v ? v : "", break_grans[i]));
trunc_ptrs[(size_t)r * nbreaks + i] = out.cell_backing.back().c_str();
}
}
}
auto cell_for = [&](int r, int i) -> const char* {
int bc = break_cols[i];
if (bc < 0) return "";
if (break_grans[i] != DateGranularity::None) {
return trunc_ptrs[(size_t)r * nbreaks + i];
}
const char* v = in_cells[r * in_cols + bc];
return v ? v : "";
};
auto make_key = [&](int r) -> std::string {
std::string k;
for (size_t i = 0; i < break_cols.size(); ++i) {
for (int i = 0; i < nbreaks; ++i) {
if (i > 0) k += '\x1f'; // separador unit-separator (no aparece en datos)
int bc = break_cols[i];
if (bc < 0) continue;
const char* v = in_cells[r * in_cols + bc];
k += (v ? v : "");
k += cell_for(r, i);
}
return k;
};
@@ -727,10 +828,9 @@ StageOutput compute_stage(const char* const* in_cells, int in_rows, int in_cols,
key_to_group.emplace(k, gi);
group_keys.push_back(k);
group_rows.emplace_back();
std::vector<const char*> bv(break_cols.size(), "");
for (size_t i = 0; i < break_cols.size(); ++i) {
int bc = break_cols[i];
bv[i] = (bc >= 0) ? in_cells[r * in_cols + bc] : "";
std::vector<const char*> bv((size_t)nbreaks, "");
for (int i = 0; i < nbreaks; ++i) {
bv[i] = cell_for(r, i);
}
group_breakvals.push_back(std::move(bv));
} else gi = it->second;
@@ -742,11 +842,17 @@ StageOutput compute_stage(const char* const* in_cells, int in_rows, int in_cols,
out.cols = out_cols;
out.headers.reserve(out_cols);
out.types.reserve(out_cols);
for (size_t i = 0; i < stage.breakouts.size(); ++i) {
for (int i = 0; i < nbreaks; ++i) {
out.headers.push_back(stage.breakouts[i]);
int bc = break_cols[i];
out.types.push_back((bc >= 0 && bc < (int)in_types.size())
? in_types[bc] : ColumnType::String);
// Si hay granularity activa, el output es String (formato ymd o similar),
// no la fecha original.
ColumnType ot = ColumnType::String;
if (break_grans[i] == DateGranularity::None
&& bc >= 0 && bc < (int)in_types.size()) {
ot = in_types[bc];
}
out.types.push_back(ot);
}
for (const auto& a : stage.aggregations) {
out.headers.push_back(aggregation_alias(a));
@@ -1102,4 +1208,288 @@ StageOutput join_tables(const char* const* left_cells, int left_rows, int left_c
return out;
}
// ----------------------------------------------------------------------------
// Fase 10: drill extendido — granularity + presets.
// ----------------------------------------------------------------------------
const char* date_granularity_token(DateGranularity g) {
switch (g) {
case DateGranularity::Year: return "year";
case DateGranularity::Month: return "month";
case DateGranularity::Week: return "week";
case DateGranularity::Day: return "day";
case DateGranularity::Hour: return "hour";
default: return "";
}
}
DateGranularity date_granularity_from_token(const char* s) {
if (!s) return DateGranularity::None;
std::string t(s);
if (t == "year") return DateGranularity::Year;
if (t == "month") return DateGranularity::Month;
if (t == "week") return DateGranularity::Week;
if (t == "day") return DateGranularity::Day;
if (t == "hour") return DateGranularity::Hour;
return DateGranularity::None;
}
DateGranularity parse_breakout_granularity(const std::string& breakout,
std::string& col_out) {
auto pos = breakout.rfind(':');
if (pos == std::string::npos) {
col_out = breakout;
return DateGranularity::None;
}
std::string suffix = breakout.substr(pos + 1);
DateGranularity g = date_granularity_from_token(suffix.c_str());
if (g == DateGranularity::None) {
col_out = breakout;
return DateGranularity::None;
}
col_out = breakout.substr(0, pos);
return g;
}
std::string compose_breakout(const std::string& col, DateGranularity g) {
if (g == DateGranularity::None) return col;
return col + ":" + date_granularity_token(g);
}
int nearest_index_1d(double target, const double* xs, int n) {
if (n <= 0 || !xs) return -1;
int best = -1;
double best_d = 0.0;
for (int i = 0; i < n; ++i) {
double v = xs[i];
if (std::isnan(v)) continue;
double d = std::fabs(v - target);
if (best < 0 || d < best_d) { best = i; best_d = d; }
}
return best;
}
int nearest_index_2d(double tx, double ty,
const double* xs, const double* ys, int n) {
if (n <= 0 || !xs || !ys) return -1;
int best = -1;
double best_d = 0.0;
for (int i = 0; i < n; ++i) {
double x = xs[i], y = ys[i];
if (std::isnan(x) || std::isnan(y)) continue;
double dx = x - tx, dy = y - ty;
double d = dx*dx + dy*dy;
if (best < 0 || d < best_d) { best = i; best_d = d; }
}
return best;
}
double pie_angle(double cx, double cy, double mx, double my) {
// ImPlot pie: 0 = top, sentido horario. atan2 estandar: 0 = +X (right), CCW.
// Conversion: ImPlot angle = atan2(dx, -dy) y normalizar a [0, 2*PI).
double dx = mx - cx;
double dy = my - cy;
double a = std::atan2(dx, -dy); // 0 cuando (dx=0, dy<0) = top
const double two_pi = 6.283185307179586;
if (a < 0) a += two_pi;
return a;
}
int pie_slice_at_angle(double angle, const double* sums, int n) {
if (n <= 0 || !sums) return -1;
double total = 0.0;
for (int i = 0; i < n; ++i) {
if (sums[i] < 0) return -1;
total += sums[i];
}
if (total <= 0.0) return -1;
const double two_pi = 6.283185307179586;
if (angle < 0 || angle >= two_pi) return -1;
double cum = 0.0;
for (int i = 0; i < n; ++i) {
cum += (sums[i] / total) * two_pi;
if (angle < cum) return i;
}
return n - 1; // edge case rounding
}
void heatmap_cell_at(double px, double py, int rows, int cols,
int& row_out, int& col_out) {
row_out = -1;
col_out = -1;
if (rows <= 0 || cols <= 0) return;
if (px < 0.0 || px >= (double)cols) return;
if (py < 0.0 || py >= (double)rows) return;
col_out = (int)px;
// ImPlot heatmap pinta row 0 arriba; plot Y suele invertirse. Caller
// normaliza si necesita. Aqui devolvemos row = floor(py) en coord plot.
row_out = (int)py;
}
void column_min_max(const char* const* cells, int rows, int cols, int col_idx,
std::string& min_out, std::string& max_out) {
min_out.clear();
max_out.clear();
if (col_idx < 0 || col_idx >= cols) return;
bool first = true;
for (int r = 0; r < rows; ++r) {
const char* v = cells[r * cols + col_idx];
if (!v || !*v) continue;
std::string s(v);
if (first) {
min_out = s;
max_out = s;
first = false;
} else {
if (s < min_out) min_out = s;
if (s > max_out) max_out = s;
}
}
}
namespace {
// Parse ISO "YYYY-MM-DD..." -> (y, m, d). True si los 3 primeros campos OK.
bool parse_ymd(const std::string& s, int& y, int& m, int& d) {
if (s.size() < 10) return false;
for (int i : {0,1,2,3,5,6,8,9}) {
if (s[(size_t)i] < '0' || s[(size_t)i] > '9') return false;
}
if (s[4] != '-' || s[7] != '-') return false;
y = (s[0]-'0')*1000 + (s[1]-'0')*100 + (s[2]-'0')*10 + (s[3]-'0');
m = (s[5]-'0')*10 + (s[6]-'0');
d = (s[8]-'0')*10 + (s[9]-'0');
if (m < 1 || m > 12 || d < 1 || d > 31) return false;
return true;
}
// Dias desde 0001-01-01 (proleptic Gregorian).
long ymd_to_days(int y, int m, int d) {
if (m <= 2) { y -= 1; m += 12; }
long era = (y >= 0 ? y : y - 399) / 400;
unsigned yoe = (unsigned)(y - era * 400);
unsigned doy = (unsigned)((153 * (m - 3) + 2) / 5 + d - 1);
unsigned doe = yoe * 365 + yoe/4 - yoe/100 + doy;
return era * 146097 + (long)doe;
}
void days_to_ymd(long days, int& y, int& m, int& d) {
long era = (days >= 0 ? days : days - 146096) / 146097;
unsigned doe = (unsigned)(days - era * 146097);
unsigned yoe = (doe - doe/1460 + doe/36524 - doe/146096) / 365;
int yr = (int)yoe + (int)era * 400;
unsigned doy = doe - (365*yoe + yoe/4 - yoe/100);
unsigned mp = (5*doy + 2)/153;
unsigned day = doy - (153*mp + 2)/5 + 1;
unsigned mon = mp < 10 ? mp + 3 : mp - 9;
if (mon <= 2) yr += 1;
y = yr; m = (int)mon; d = (int)day;
}
} // anon
std::string truncate_date(const std::string& date, DateGranularity g) {
if (g == DateGranularity::None) return date;
int y, m, d;
if (!parse_ymd(date, y, m, d)) return date;
char buf[32];
switch (g) {
case DateGranularity::Year:
std::snprintf(buf, sizeof(buf), "%04d", y);
return buf;
case DateGranularity::Month:
std::snprintf(buf, sizeof(buf), "%04d-%02d", y, m);
return buf;
case DateGranularity::Day:
std::snprintf(buf, sizeof(buf), "%04d-%02d-%02d", y, m, d);
return buf;
case DateGranularity::Hour: {
int hh = 0;
if (date.size() >= 13 && date[10] == 'T'
&& date[11] >= '0' && date[11] <= '9'
&& date[12] >= '0' && date[12] <= '9') {
hh = (date[11]-'0')*10 + (date[12]-'0');
if (hh < 0 || hh > 23) hh = 0;
}
std::snprintf(buf, sizeof(buf), "%04d-%02d-%02dT%02d", y, m, d, hh);
return buf;
}
case DateGranularity::Week: {
// Hinnant ymd_to_days: day 0 == 0000-03-01 (Wednesday).
// days%7: 0=Wed, 1=Thu, 2=Fri, 3=Sat, 4=Sun, 5=Mon, 6=Tue.
// Monday offset: (mod - 5 + 7) % 7.
long days = ymd_to_days(y, m, d);
int mod = (int)(((days % 7) + 7) % 7);
int rem = ((mod - 5) % 7 + 7) % 7;
long monday = days - rem;
int yy, mm, dd;
days_to_ymd(monday, yy, mm, dd);
std::snprintf(buf, sizeof(buf), "%04d-%02d-%02d", yy, mm, dd);
return buf;
}
default: return date;
}
}
DateGranularity auto_date_granularity(const std::string& min_ymd,
const std::string& max_ymd) {
int y1,m1,d1, y2,m2,d2;
if (!parse_ymd(min_ymd, y1,m1,d1)) return DateGranularity::Day;
if (!parse_ymd(max_ymd, y2,m2,d2)) return DateGranularity::Day;
long span = ymd_to_days(y2,m2,d2) - ymd_to_days(y1,m1,d1);
if (span < 0) span = -span;
if (span > 730) return DateGranularity::Year; // >2 anios
if (span > 60) return DateGranularity::Month;
if (span > 14) return DateGranularity::Week;
return DateGranularity::Day;
}
const char* filter_preset_label(FilterPreset p) {
switch (p) {
case FilterPreset::Last7d: return "Last 7 days";
case FilterPreset::Last30d: return "Last 30 days";
case FilterPreset::Last90d: return "Last 90 days";
case FilterPreset::ExcludeNulls: return "Exclude nulls";
case FilterPreset::NonZero: return "Non-zero only";
}
return "?";
}
std::vector<Filter> build_preset_filters(FilterPreset preset, int col,
const std::string& today_ymd) {
std::vector<Filter> out;
auto last_n = [&](int n) {
int y, m, d;
if (!parse_ymd(today_ymd, y, m, d)) return;
long days = ymd_to_days(y, m, d) - n;
int yy, mm, dd;
days_to_ymd(days, yy, mm, dd);
char buf[16];
std::snprintf(buf, sizeof(buf), "%04d-%02d-%02d", yy, mm, dd);
Filter f;
f.col = col;
f.op = Op::Gte;
f.value = buf;
out.push_back(f);
};
switch (preset) {
case FilterPreset::Last7d: last_n(7); break;
case FilterPreset::Last30d: last_n(30); break;
case FilterPreset::Last90d: last_n(90); break;
case FilterPreset::ExcludeNulls: {
Filter f; f.col = col; f.op = Op::Neq; f.value = "";
out.push_back(f);
break;
}
case FilterPreset::NonZero: {
Filter f1; f1.col = col; f1.op = Op::Neq; f1.value = "";
Filter f2; f2.col = col; f2.op = Op::Neq; f2.value = "0";
out.push_back(f1);
out.push_back(f2);
break;
}
}
return out;
}
} // namespace data_table