chore: auto-commit (286 archivos)

- .claude/agents/fn-orquestador/SKILL.md
- .claude/commands/fn_claude.md
- .claude/rules/INDEX.md
- .claude/rules/cpp_apps.md
- .claude/rules/ids_naming.md
- CHANGELOG.md
- apps/dag_engine/README.md
- apps/dag_engine/api.go
- apps/dag_engine/dags_migrated/example.yaml
- apps/dag_engine/dags_migrated/example_lineage_tracking.yaml
- ...

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-16 16:33:22 +02:00
parent 0b9af8f1bb
commit a03675113a
281 changed files with 12596 additions and 19526 deletions
+75
View File
@@ -0,0 +1,75 @@
// auto_detect_type — infiere ColumnType muestreando celdas de una columna.
// Promovido del playground tables (issue 0081-F). Pura — sin I/O ni estado.
#include "core/auto_detect_type.h"
#include <cstdlib>
#include <cstring>
namespace data_table {
bool is_bool_text(const char* s) {
return std::strcmp(s, "true") == 0 || std::strcmp(s, "false") == 0;
}
bool is_date_iso(const char* s) {
// YYYY-MM-DD minimo
if (std::strlen(s) < 10) return false;
auto d = [](char c){ return c >= '0' && c <= '9'; };
return d(s[0]) && d(s[1]) && d(s[2]) && d(s[3]) && s[4] == '-' &&
d(s[5]) && d(s[6]) && s[7] == '-' && d(s[8]) && d(s[9]);
}
bool is_json_text(const char* s) {
while (*s == ' ' || *s == '\t') ++s;
return *s == '{' || *s == '[';
}
bool is_integer_text(const char* s) {
if (!*s) return false;
if (*s == '-' || *s == '+') ++s;
if (!*s) return false;
for (; *s; ++s) if (*s < '0' || *s > '9') return false;
return true;
}
bool parse_number(const char* s, double& out) {
if (!s || !*s) return false;
char* end = nullptr;
double v = std::strtod(s, &end);
if (end == s) return false;
while (*end == ' ' || *end == '\t') end++;
if (*end != '\0') return false;
out = v;
return true;
}
ColumnType auto_detect_type(const char* const* cells, int rows, int cols,
int col, int sample_n)
{
if (col < 0 || col >= cols) return ColumnType::String;
int n_total = 0, n_int = 0, n_float = 0, n_bool = 0, n_date = 0, n_json = 0;
for (int r = 0; r < rows && n_total < sample_n; ++r) {
const char* c = cells[r * cols + col];
if (!c || !*c) continue;
n_total++;
if (is_bool_text(c)) { n_bool++; continue; }
if (is_date_iso(c)) { n_date++; continue; }
if (is_json_text(c)) { n_json++; continue; }
double v;
if (parse_number(c, v)) {
if (is_integer_text(c)) n_int++;
else n_float++;
continue;
}
// string puro: no se acumula en ningun tipo -> garantiza fallthrough a String
}
if (n_total == 0) return ColumnType::String;
if (n_bool == n_total) return ColumnType::Bool;
if (n_date == n_total) return ColumnType::Date;
if (n_json == n_total) return ColumnType::Json;
if (n_int + n_float == n_total) return (n_float > 0) ? ColumnType::Float : ColumnType::Int;
return ColumnType::String;
}
} // namespace data_table
+30
View File
@@ -0,0 +1,30 @@
// auto_detect_type — infiere ColumnType muestreando celdas de una columna.
// Promovido del playground tables (issue 0081-F). Pura — sin I/O ni estado.
#pragma once
#include "core/data_table_types.h"
namespace data_table {
// Devuelve true si la cadena s es un literal booleano ("true" / "false").
bool is_bool_text(const char* s);
// Devuelve true si s tiene formato de fecha ISO YYYY-MM-DD (minimo 10 chars).
bool is_date_iso(const char* s);
// Devuelve true si s empieza por '{' o '[' (ignora espacios iniciales).
bool is_json_text(const char* s);
// Devuelve true si s es un entero decimal con signo opcional.
bool is_integer_text(const char* s);
// Parsea s como double. Devuelve false si s esta vacio o no es numerico.
bool parse_number(const char* s, double& out);
// auto_detect_type: escanea hasta sample_n celdas no-vacias de la columna col
// en la matriz cells (row-major, rows x cols) y devuelve el ColumnType inferido.
// Retorna ColumnType::String si no hay celdas validas o si hay mezcla.
ColumnType auto_detect_type(const char* const* cells, int rows, int cols,
int col, int sample_n = 64);
} // namespace data_table
+69
View File
@@ -0,0 +1,69 @@
---
name: auto_detect_type
kind: function
lang: cpp
domain: core
version: "1.0.0"
purity: pure
signature: "ColumnType auto_detect_type(const char* const* cells, int rows, int cols, int col, int sample_n = 64)"
description: "Infiere el ColumnType de una columna escaneando hasta sample_n celdas no-vacias. Detecta Int, Float, Bool, Date (ISO YYYY-MM-DD), Json ({/[) y String."
tags: [tables, stats, inference, type-detection, tql, cpp-tables]
uses_functions: []
uses_types: []
returns: []
returns_optional: false
error_type: ""
imports: ["core/auto_detect_type.h"]
tested: true
tests:
- "auto_detect_type: columna numerica entera"
- "auto_detect_type: columna de texto libre"
- "auto_detect_type: columna de fechas ISO"
- "auto_detect_type: columna booleana"
- "auto_detect_type: columna float"
- "auto_detect_type: mezcla numerica y texto retorna String"
- "auto_detect_type: columna vacia retorna String"
- "auto_detect_type: sample_n=2 evalua solo las primeras 2 celdas no-vacias"
test_file_path: "cpp/tests/test_auto_detect_type.cpp"
file_path: "cpp/functions/core/auto_detect_type.cpp"
params:
- name: cells
desc: "Matriz row-major de punteros a C-strings (rows x cols). Puede contener nullptr para celdas nulas."
- name: rows
desc: "Numero de filas de la matriz."
- name: cols
desc: "Numero de columnas de la matriz."
- name: col
desc: "Indice de la columna a analizar (0-based)."
- name: sample_n
desc: "Maximo de celdas no-vacias a escanear. Default 64. Usar valor menor para columnas muy largas."
output: "ColumnType inferido: Int si todos son enteros, Float si hay decimales, Bool si 'true'/'false', Date si YYYY-MM-DD, Json si '{' o '[', String en cualquier otro caso o mezcla."
---
## Ejemplo
```cpp
#include "core/auto_detect_type.h"
// Columna de fechas: ["2024-01-01", "2024-06-15", "2023-12-31"]
const char* dates[] = {"2024-01-01", "2024-06-15", "2023-12-31"};
data_table::ColumnType t = data_table::auto_detect_type(dates, 3, 1, 0);
// t == ColumnType::Date
// Columna de enteros
const char* nums[] = {"1", "42", "100"};
t = data_table::auto_detect_type(nums, 3, 1, 0);
// t == ColumnType::Int
```
## Cuando usarla
Cuando recibas datos tabulares sin tipo declarado y necesites inferirlo antes de aplicar filtros numericos, formateo de celdas o estadisticas. Tipicamente se llama una vez al cargar o refrescar la fuente de datos.
## Gotchas
- Requiere que ALL las celdas no-vacias sean del mismo tipo para retornar ese tipo. Una sola celda texto en una columna de enteros devuelve `ColumnType::String`.
- Bool se evalua antes que Date y Number, por eso "true"/"false" no se confunde con texto.
- La deteccion de Date solo reconoce ISO 8601 (`YYYY-MM-DD`). Formatos locales (DD/MM/YYYY) caen en String.
- `sample_n` no considera el orden aleatorio: evalua las primeras N celdas no-vacias del principio. Si los datos estan ordenados por tipo, ajustar `sample_n`.
@@ -0,0 +1,94 @@
// compute_column_stats — estadisticas por columna (mean, p25/p50/p75, hist, top).
// Promovido del playground tables (issue 0081-F). Pura — sin I/O ni estado.
#include "core/compute_column_stats.h"
#include <algorithm>
#include <cmath>
#include <unordered_map>
#include <vector>
namespace data_table {
ColStats compute_column_stats(const char* const* cells, int rows, int cols,
int col, int unique_cap,
const int* indices, int n_indices)
{
ColStats s;
if (col < 0 || col >= cols) return s;
bool use_idx = (indices != nullptr && n_indices > 0);
int n = use_idx ? n_indices : rows;
s.total = n;
std::unordered_map<std::string, int> counts;
if (unique_cap > 0) counts.reserve(std::min(unique_cap, n));
bool all_numeric = true;
std::vector<double> nums;
nums.reserve(n);
for (int i = 0; i < n; ++i) {
int r = use_idx ? indices[i] : i;
if (r < 0 || r >= rows) continue;
const char* c = cells[r * cols + col];
if (!c || !*c) { s.empty_count++; continue; }
double v;
if (parse_number(c, v)) {
if (s.numeric_count == 0) { s.min = v; s.max = v; }
else {
if (v < s.min) s.min = v;
if (v > s.max) s.max = v;
}
s.sum += v;
s.numeric_count++;
nums.push_back(v);
} else {
all_numeric = false;
}
if (unique_cap == 0 || (int)counts.size() < unique_cap) {
counts[c]++;
} else {
auto it = counts.find(c);
if (it != counts.end()) it->second++;
else s.unique_capped = true;
}
}
s.unique_count = (int)counts.size();
s.numeric = all_numeric && s.numeric_count > 0;
if (s.numeric_count > 0) s.mean = s.sum / s.numeric_count;
// Top 8 categorias por count descendente.
if (!counts.empty()) {
std::vector<std::pair<std::string,int>> v(counts.begin(), counts.end());
int topN = std::min<int>(8, (int)v.size());
std::partial_sort(v.begin(), v.begin() + topN, v.end(),
[](const auto& a, const auto& b){ return a.second > b.second; });
v.resize(topN);
s.top_categories = std::move(v);
}
if (s.numeric && !nums.empty()) {
std::sort(nums.begin(), nums.end());
auto pct = [&](double p) {
double idx = p * (nums.size() - 1);
size_t lo = (size_t)idx;
size_t hi = std::min(lo + 1, nums.size() - 1);
double t = idx - lo;
return nums[lo] * (1.0 - t) + nums[hi] * t;
};
s.p25 = pct(0.25);
s.p50 = pct(0.50);
s.p75 = pct(0.75);
s.hist.assign(HIST_BINS, 0.0f);
double range = s.max - s.min;
if (range <= 0) {
s.hist[HIST_BINS / 2] = (float)nums.size();
} else {
for (double val : nums) {
int b = (int)((val - s.min) / range * HIST_BINS);
if (b < 0) b = 0;
if (b >= HIST_BINS) b = HIST_BINS - 1;
s.hist[b] += 1.0f;
}
}
}
return s;
}
} // namespace data_table
+43
View File
@@ -0,0 +1,43 @@
// compute_column_stats — estadisticas por columna (mean, p25/p50/p75, hist, top).
// Promovido del playground tables (issue 0081-F). Pura — sin I/O ni estado.
#pragma once
#include "core/auto_detect_type.h" // parse_number reutilizado en la impl
#include <string>
#include <utility>
#include <vector>
namespace data_table {
// ColStats: estadisticas calculadas para una columna de la tabla.
// Tipo producto (todos los campos siempre presentes).
struct ColStats {
int total = 0; // filas consideradas (respeta indices si se pasa)
int empty_count = 0; // celdas nulas o vacias
int unique_count = 0; // valores distintos (hasta unique_cap)
bool unique_capped = false; // true si se llego al limite unique_cap
bool numeric = false; // true si TODOS los no-vacios son numericos
int numeric_count = 0;
double min = 0;
double max = 0;
double sum = 0;
double mean = 0;
double p25 = 0;
double p50 = 0;
double p75 = 0;
std::vector<float> hist; // HIST_BINS bins normalizados por count
std::vector<std::pair<std::string,int>> top_categories; // top-8 por frecuencia desc
};
constexpr int HIST_BINS = 24;
// compute_column_stats: calcula ColStats para la columna col de cells (row-major,
// rows x cols). Si indices != nullptr, solo considera las filas listadas en
// indices[0..n_indices-1] (util para filtrar). unique_cap limita el tracking de
// valores unicos para columnas de cardinalidad alta.
ColStats compute_column_stats(const char* const* cells, int rows, int cols,
int col, int unique_cap = 100000,
const int* indices = nullptr, int n_indices = 0);
} // namespace data_table
@@ -0,0 +1,75 @@
---
name: compute_column_stats
kind: function
lang: cpp
domain: core
version: "1.0.0"
purity: pure
signature: "ColStats compute_column_stats(const char* const* cells, int rows, int cols, int col, int unique_cap = 100000, const int* indices = nullptr, int n_indices = 0)"
description: "Calcula estadisticas completas para una columna: mean, p25/p50/p75, min/max, count de vacios, unicos, histograma de 24 bins y top-8 categorias por frecuencia."
tags: [tables, stats, statistics, histogram, percentile, tql, cpp-tables]
uses_functions: []
uses_types: []
returns: []
returns_optional: false
error_type: ""
imports: ["core/compute_column_stats.h"]
tested: true
tests:
- "compute_column_stats: media correcta sobre vector numerico"
- "compute_column_stats: p50 es mediana"
- "compute_column_stats: p25 y p75 correctos"
- "compute_column_stats: conteo de vacios correcto"
- "compute_column_stats: columna texto no es numerica"
- "compute_column_stats: unique_count correcto"
- "compute_column_stats: top_categories ordena por frecuencia desc"
- "compute_column_stats: indices filtra filas correctamente"
- "compute_column_stats: histograma generado para numerica"
- "compute_column_stats: columna vacia retorna stats en cero"
- "compute_column_stats: col fuera de rango devuelve ColStats defecto"
test_file_path: "cpp/tests/test_compute_column_stats.cpp"
file_path: "cpp/functions/core/compute_column_stats.cpp"
params:
- name: cells
desc: "Matriz row-major de punteros a C-strings (rows x cols). nullptr indica celda nula."
- name: rows
desc: "Numero de filas de la matriz."
- name: cols
desc: "Numero de columnas de la matriz."
- name: col
desc: "Indice de la columna a analizar (0-based)."
- name: unique_cap
desc: "Maximo de valores unicos a trackear. Default 100000. Con 0 trackea todos."
- name: indices
desc: "Array opcional de indices de fila a incluir. Si nullptr se usan todas las filas."
- name: n_indices
desc: "Longitud del array indices. Ignorado si indices es nullptr."
output: "ColStats con total, empty_count, unique_count, numeric, numeric_count, min, max, sum, mean, p25, p50, p75, hist (HIST_BINS=24 floats) y top_categories (hasta 8 pares string+count)."
---
## Ejemplo
```cpp
#include "core/compute_column_stats.h"
// Columna: ["10", "20", "30", "40", "50"]
const char* data[] = {"10", "20", "30", "40", "50"};
data_table::ColStats s = data_table::compute_column_stats(data, 5, 1, 0);
// s.mean == 30.0, s.p50 == 30.0, s.p25 == 20.0, s.p75 == 40.0
// Con filtro de filas (solo filas 0, 2, 4 -> [10, 30, 50]):
int idx[] = {0, 2, 4};
s = data_table::compute_column_stats(data, 5, 1, 0, 100000, idx, 3);
// s.mean == 30.0, s.total == 3
```
## Cuando usarla
Cuando necesites mostrar un panel de estadisticas de columna (inspector, tooltip de header, sidebar de datos), calcular min/max para escalar un histograma, o detectar columnas con mucho missing antes de aplicar un filtro.
## Gotchas
- `numeric` es `true` solo si TODOS los valores no-vacios son numericos. Una sola celda texto hace que `numeric == false` y `hist` quede vacio.
- El histograma usa interpolacion lineal para los percentiles (igual que numpy `percentile` con `method='linear'`). Para distribuciones con un solo valor, el bin central recibe todo el count.
- `unique_capped` se activa cuando hay mas valores distintos que `unique_cap`. En ese caso `unique_count == unique_cap` y puede no reflejar la cardinalidad real.
- Las `top_categories` incluyen valores numericos si la columna no es puramente numerica (mezcla). Si la columna es puramente numerica, `top_categories` lista los valores numericos como strings.
+39
View File
@@ -0,0 +1,39 @@
// compute_pipeline — Pure TQL pipeline execution.
// Chains compute_stage calls: output of stage N becomes input of stage N+1.
// Promoted from cpp/apps/primitives_gallery/playground/tables/ (issue 0081-B).
#include "core/compute_pipeline.h"
#include <vector>
#include <string>
namespace data_table {
StageOutput compute_pipeline(const char* const* in_cells, int in_rows, int in_cols,
const std::vector<std::string>& in_headers,
const std::vector<ColumnType>& in_types,
const std::vector<Stage>& stages)
{
if (stages.empty()) {
// Passthrough: wrap raw input.
Stage empty_stage;
return compute_stage(in_cells, in_rows, in_cols, in_headers, in_types, empty_stage);
}
// Run first stage against raw input.
StageOutput prev = compute_stage(in_cells, in_rows, in_cols, in_headers, in_types, stages[0]);
// Chain remaining stages: each consumes the previous StageOutput.
for (size_t i = 1; i < stages.size(); ++i) {
StageOutput next = compute_stage(
prev.cells.empty() ? nullptr : prev.cells.data(),
prev.rows,
prev.cols,
prev.headers,
prev.types,
stages[i]);
prev = std::move(next);
}
return prev;
}
} // namespace data_table
+21
View File
@@ -0,0 +1,21 @@
// compute_pipeline — Pure TQL pipeline: chains N stages to produce a final StageOutput.
// Part of issue 0081-B: promoted from primitives_gallery playground to registry.
// Depends on compute_stage.h. No ImGui, no I/O.
#pragma once
#include "core/compute_stage.h"
#include "core/data_table_types.h"
#include <string>
#include <vector>
namespace data_table {
// compute_pipeline — Pure: applies stages[0..N-1] in sequence.
// Each stage receives the StageOutput of the previous one as input.
// If stages is empty, returns a passthrough StageOutput wrapping in_cells.
StageOutput compute_pipeline(const char* const* in_cells, int in_rows, int in_cols,
const std::vector<std::string>& in_headers,
const std::vector<ColumnType>& in_types,
const std::vector<Stage>& stages);
} // namespace data_table
+81
View File
@@ -0,0 +1,81 @@
---
name: compute_pipeline
kind: function
lang: cpp
domain: core
version: "1.0.0"
purity: pure
signature: "data_table::StageOutput data_table::compute_pipeline(const char* const* in_cells, int in_rows, int in_cols, const std::vector<std::string>& in_headers, const std::vector<ColumnType>& in_types, const std::vector<Stage>& stages)"
description: "Chains N TQL stages sequentially: output of stage i becomes input of stage i+1. Returns the final StageOutput. Empty stages returns a passthrough. No ImGui, no I/O."
tags: [tables, pipeline, tql, chain, pure, cpp-tables]
uses_functions: [compute_stage_cpp_core]
uses_types: [data_table_types_cpp_core]
returns: []
returns_optional: false
error_type: ""
imports: []
tested: true
tests:
- "compute_pipeline empty stages returns passthrough"
- "compute_pipeline single stage equals compute_stage"
- "compute_pipeline two stages chain filter then group"
- "compute_pipeline three stage chain"
- "compute_pipeline empty table"
test_file_path: "cpp/tests/test_compute_pipeline.cpp"
file_path: "cpp/functions/core/compute_pipeline.cpp"
params:
- name: in_cells
desc: "Row-major cell array (in_rows * in_cols pointers). May be null when in_rows == 0."
- name: in_rows
desc: "Number of input rows."
- name: in_cols
desc: "Number of columns per row. Must match in_headers.size()."
- name: in_headers
desc: "Column names for the raw input. Passed as-is to stage 0."
- name: in_types
desc: "Declared ColumnType per column for the raw input."
- name: stages
desc: "Ordered list of Stage descriptors. Each stage runs on the output of the previous one. Empty = passthrough."
output: "StageOutput produced by the last stage. Owns its cell_backing. If stages is empty, returns a no-op passthrough of in_cells."
---
## Ejemplo
```cpp
#include "core/compute_pipeline.h"
using namespace data_table;
const char* raw[] = {
"EU","A","100", "US","A","200",
"EU","B","300", "EU","A","50"
};
std::vector<std::string> hdrs = {"region","type","revenue"};
std::vector<ColumnType> types = {ColumnType::String, ColumnType::String, ColumnType::Float};
// Stage 0: keep EU rows only
Stage s0;
Filter f; f.col=0; f.op=Op::Eq; f.value="EU"; s0.filters.push_back(f);
// Stage 1: group by type, sum revenue, sort desc
Stage s1;
s1.breakouts = {"type"};
Aggregation agg; agg.fn=AggFn::Sum; agg.col="revenue"; s1.aggregations.push_back(agg);
SortClause sc; sc.col="sum_revenue"; sc.desc=true; s1.sorts.push_back(sc);
StageOutput out = compute_pipeline(raw, 4, 3, hdrs, types, {s0, s1});
// out.rows == 2 (B=300, A=150), sorted desc
// out.cells[0]=="B", out.cells[1]=="300"
// out.cells[2]=="A", out.cells[3]=="150"
```
## Cuando usarla
- Cuando tienes una secuencia de transformaciones TQL (filter -> group -> filter -> sort) y quieres ejecutarlas todas en un solo call.
- Para implementar el "active pipeline" de un State con multiples stages: `compute_pipeline(raw, rows, cols, hdrs, types, state.stages)`.
- En tests de integracion de pipelines multi-stage sin renderizar nada.
## Gotchas
- Cada stage crea un `StageOutput` intermedio con su propio `cell_backing`. Los intermedios se destruyen al final de la llamada — solo el resultado final sobrevive.
- Los punteros de `out.cells` del ultimo stage pueden apuntar a backing de ese stage o a cells del input del stage anterior (passthrough rows). El caller solo necesita mantener vivo el `StageOutput` devuelto — no el `in_cells` original si hubo algun stage que agrego rows al backing.
- Si `stages` es vacio, se comporta igual que `compute_stage` con un `Stage{}` vacio (passthrough, copia punteros de `in_cells` sin copiar datos).
+525
View File
@@ -0,0 +1,525 @@
// compute_stage — Pure TQL stage execution.
// Promoted from cpp/apps/primitives_gallery/playground/tables/data_table_logic.cpp
// (issue 0081-B). No ImGui, no I/O. Helper statics are file-private.
#include "core/compute_stage.h"
#include "core/tql_helpers.h" // aggregation_alias, agg_fn_token (avoids ODR dup)
#include <algorithm>
#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>
namespace data_table {
ColumnType aggregation_type(const Aggregation& a,
const std::vector<std::string>& in_headers,
const std::vector<ColumnType>& in_types)
{
if (a.fn == AggFn::Count || a.fn == AggFn::Distinct) return ColumnType::Int;
if (a.fn == AggFn::Min || a.fn == AggFn::Max) {
for (size_t i = 0; i < in_headers.size(); ++i) {
if (in_headers[i] == a.col && i < in_types.size()) return in_types[i];
}
return ColumnType::String;
}
return ColumnType::Float;
}
std::vector<int> apply_filters(const char* const* cells, int rows, int cols,
const std::vector<Filter>& filters)
{
std::vector<int> out;
out.reserve(rows);
for (int r = 0; r < rows; ++r) {
bool keep = true;
for (const auto& f : filters) {
if (f.col < 0 || f.col >= cols) continue;
const char* cell = cells[r * cols + f.col];
// compare inline: numeric if both parseable, lexical otherwise
auto do_compare = [](const char* a, const char* b, Op op) -> bool {
if (!a) a = "";
if (!b) b = "";
switch (op) {
case Op::Contains: return std::strstr(a, b) != nullptr;
case Op::NotContains: return std::strstr(a, b) == nullptr;
case Op::StartsWith: {
size_t lb = std::strlen(b);
return std::strncmp(a, b, lb) == 0;
}
case Op::EndsWith: {
size_t la = std::strlen(a), lb = std::strlen(b);
return lb <= la && std::strcmp(a + la - lb, b) == 0;
}
default: break;
}
double na, nb;
char* ea = nullptr; char* eb = nullptr;
na = std::strtod(a, &ea); nb = std::strtod(b, &eb);
bool numeric = (ea != a && *ea == '\0') && (eb != b && *eb == '\0');
if (numeric) {
switch (op) {
case Op::Eq: return na == nb;
case Op::Neq: return na != nb;
case Op::Gt: return na > nb;
case Op::Gte: return na >= nb;
case Op::Lt: return na < nb;
case Op::Lte: return na <= nb;
default: break;
}
}
int c = std::strcmp(a, b);
switch (op) {
case Op::Eq: return c == 0;
case Op::Neq: return c != 0;
case Op::Gt: return c > 0;
case Op::Gte: return c >= 0;
case Op::Lt: return c < 0;
case Op::Lte: return c <= 0;
default: break;
}
return false;
};
if (!do_compare(cell, f.value.c_str(), f.op)) { keep = false; break; }
}
if (keep) out.push_back(r);
}
return out;
}
// ----------------------------------------------------------------------------
// File-private helpers (static)
// ----------------------------------------------------------------------------
static bool parse_num(const char* s, double& out) {
if (!s || !*s) return false;
char* end = nullptr;
double v = std::strtod(s, &end);
if (end == s) return false;
while (*end == ' ' || *end == '\t') end++;
if (*end != '\0') return false;
out = v;
return true;
}
namespace {
int find_col(const std::vector<std::string>& headers, const std::string& name) {
for (size_t i = 0; i < headers.size(); ++i) if (headers[i] == name) return (int)i;
return -1;
}
int cmp_cells(const char* a, const char* b) {
if (!a) a = ""; if (!b) b = "";
double na, nb;
bool num = parse_num(a, na) && parse_num(b, nb);
if (num) return (na < nb) ? -1 : (na > nb ? 1 : 0);
return std::strcmp(a, b);
}
void apply_sorts(std::vector<int>& row_idx,
const char* const* cells, int cols,
const std::vector<std::string>& headers,
const std::vector<SortClause>& sorts)
{
if (sorts.empty()) return;
std::vector<int> sort_cols(sorts.size());
for (size_t i = 0; i < sorts.size(); ++i) sort_cols[i] = find_col(headers, sorts[i].col);
std::sort(row_idx.begin(), row_idx.end(), [&](int a, int b){
for (size_t i = 0; i < sorts.size(); ++i) {
int sc = sort_cols[i];
if (sc < 0) continue;
int c = cmp_cells(cells[a * cols + sc], cells[b * cols + sc]);
if (c != 0) return sorts[i].desc ? (c > 0) : (c < 0);
}
return false;
});
}
double percentile_value(std::vector<double>& v, double p) {
if (v.empty()) return 0.0;
std::sort(v.begin(), v.end());
double idx = p * (v.size() - 1);
size_t lo = (size_t)idx;
size_t hi = std::min(lo + 1, v.size() - 1);
double t = idx - lo;
return v[lo] * (1.0 - t) + v[hi] * t;
}
double compute_agg_numeric(AggFn fn, std::vector<double>& vals, double arg) {
if (vals.empty()) return 0.0;
switch (fn) {
case AggFn::Sum: {
double s = 0; for (double v : vals) s += v; return s;
}
case AggFn::Avg: {
double s = 0; for (double v : vals) s += v; return s / vals.size();
}
case AggFn::Min: {
double m = vals[0]; for (double v : vals) if (v < m) m = v; return m;
}
case AggFn::Max: {
double m = vals[0]; for (double v : vals) if (v > m) m = v; return m;
}
case AggFn::Stddev: {
double s = 0; for (double v : vals) s += v;
double mean = s / vals.size();
double var = 0; for (double v : vals) { double d = v - mean; var += d * d; }
return std::sqrt(var / vals.size());
}
case AggFn::Median: return percentile_value(vals, 0.50);
case AggFn::P25: return percentile_value(vals, 0.25);
case AggFn::P75: return percentile_value(vals, 0.75);
case AggFn::P90: return percentile_value(vals, 0.90);
case AggFn::P99: return percentile_value(vals, 0.99);
case AggFn::Percentile: return percentile_value(vals, arg);
default: return 0.0;
}
}
std::string format_double(double v) {
char buf[64];
long long iv = (long long)v;
if ((double)iv == v) std::snprintf(buf, sizeof(buf), "%lld", iv);
else std::snprintf(buf, sizeof(buf), "%.4g", v);
return buf;
}
// parse_breakout_granularity: strip optional ":granularity" suffix.
// DateGranularity is defined in data_table_types.h (issue 0081).
DateGranularity parse_breakout_granularity_local(const std::string& breakout,
std::string& col_out)
{
auto pos = breakout.rfind(':');
if (pos == std::string::npos) {
col_out = breakout;
return DateGranularity::None;
}
std::string suffix = breakout.substr(pos + 1);
DateGranularity g = DateGranularity::None;
if (suffix == "year") g = DateGranularity::Year;
else if (suffix == "month") g = DateGranularity::Month;
else if (suffix == "week") g = DateGranularity::Week;
else if (suffix == "day") g = DateGranularity::Day;
else if (suffix == "hour") g = DateGranularity::Hour;
if (g == DateGranularity::None) {
col_out = breakout;
return DateGranularity::None;
}
col_out = breakout.substr(0, pos);
return g;
}
// truncate_date: truncate ISO date string to given granularity.
std::string truncate_date_local(const std::string& date, DateGranularity g) {
if (g == DateGranularity::None) return date;
if (date.size() < 10) return date;
// Parse YYYY-MM-DD
auto d = [](char c){ return c >= '0' && c <= '9'; };
if (!d(date[0])||!d(date[1])||!d(date[2])||!d(date[3])) return date;
if (date[4] != '-') return date;
if (!d(date[5])||!d(date[6])) return date;
if (date[7] != '-') return date;
if (!d(date[8])||!d(date[9])) return date;
int y = (date[0]-'0')*1000+(date[1]-'0')*100+(date[2]-'0')*10+(date[3]-'0');
int m = (date[5]-'0')*10+(date[6]-'0');
int dd_= (date[8]-'0')*10+(date[9]-'0');
if (m < 1 || m > 12 || dd_ < 1 || dd_ > 31) return date;
char buf[32];
switch (g) {
case DateGranularity::Year:
std::snprintf(buf, sizeof(buf), "%04d", y);
return buf;
case DateGranularity::Month:
std::snprintf(buf, sizeof(buf), "%04d-%02d", y, m);
return buf;
case DateGranularity::Day:
std::snprintf(buf, sizeof(buf), "%04d-%02d-%02d", y, m, dd_);
return buf;
case DateGranularity::Hour: {
int hh = 0;
if (date.size() >= 13 && date[10] == 'T'
&& d(date[11]) && d(date[12])) {
hh = (date[11]-'0')*10 + (date[12]-'0');
if (hh < 0 || hh > 23) hh = 0;
}
std::snprintf(buf, sizeof(buf), "%04d-%02d-%02dT%02d", y, m, dd_, hh);
return buf;
}
case DateGranularity::Week: {
// Hinnant civil calendar: ymd -> days since 0000-03-01.
// days%7: 0=Wed. Monday offset: ((days%7 - 5 + 7) % 7).
auto ymd_to_days = [](int yy, int mm, int dd2) -> long {
if (mm <= 2) { yy -= 1; mm += 12; }
long era = (yy >= 0 ? yy : yy - 399) / 400;
unsigned yoe = (unsigned)(yy - era * 400);
unsigned doy = (unsigned)((153*(mm-3)+2)/5 + dd2 - 1);
unsigned doe = yoe*365 + yoe/4 - yoe/100 + doy;
return era * 146097 + (long)doe;
};
auto days_to_ymd = [](long days, int& yy, int& mm, int& dd2) {
long era = (days >= 0 ? days : days - 146096) / 146097;
unsigned doe = (unsigned)(days - era * 146097);
unsigned yoe = (doe - doe/1460 + doe/36524 - doe/146096) / 365;
int yr = (int)yoe + (int)era * 400;
unsigned doy = doe - (365*yoe + yoe/4 - yoe/100);
unsigned mp = (5*doy + 2)/153;
unsigned day2 = doy - (153*mp+2)/5 + 1;
unsigned mon = mp < 10 ? mp+3 : mp-9;
if (mon <= 2) yr += 1;
yy = yr; mm = (int)mon; dd2 = (int)day2;
};
long days = ymd_to_days(y, m, dd_);
int mod = (int)(((days % 7) + 7) % 7);
int rem = ((mod - 5) % 7 + 7) % 7;
long monday = days - rem;
int yy2, mm2, dd2;
days_to_ymd(monday, yy2, mm2, dd2);
std::snprintf(buf, sizeof(buf), "%04d-%02d-%02d", yy2, mm2, dd2);
return buf;
}
default: return date;
}
}
} // anon namespace
// ----------------------------------------------------------------------------
// compute_stage (public)
// ----------------------------------------------------------------------------
StageOutput compute_stage(const char* const* in_cells, int in_rows, int in_cols,
const std::vector<std::string>& in_headers,
const std::vector<ColumnType>& in_types,
const Stage& stage)
{
StageOutput out;
auto visible = apply_filters(in_cells, in_rows, in_cols, stage.filters);
bool grouped = !stage.breakouts.empty() || !stage.aggregations.empty();
if (!grouped) {
// Passthrough: same shape, filtered + sorted.
out.cols = in_cols;
out.headers = in_headers;
out.types = in_types;
apply_sorts(visible, in_cells, in_cols, in_headers, stage.sorts);
out.rows = (int)visible.size();
out.cells.reserve((size_t)out.rows * in_cols);
for (int r : visible) {
for (int c = 0; c < in_cols; ++c) out.cells.push_back(in_cells[r * in_cols + c]);
}
return out;
}
// Grouped: group visible rows by breakout values, compute aggregations.
int nbreaks = (int)stage.breakouts.size();
std::vector<int> break_cols(nbreaks);
std::vector<DateGranularity> break_grans(nbreaks);
bool any_trunc = false;
for (int i = 0; i < nbreaks; ++i) {
std::string col_name;
break_grans[i] = parse_breakout_granularity_local(stage.breakouts[i], col_name);
if (break_grans[i] != DateGranularity::None) any_trunc = true;
break_cols[i] = find_col(in_headers, col_name);
}
// Pre-allocate cell_backing to avoid pointer invalidation on push_back.
out.cell_backing.reserve(
(size_t)in_rows * (size_t)nbreaks +
(size_t)in_rows * stage.aggregations.size() + 16);
std::vector<const char*> trunc_ptrs;
if (any_trunc) {
trunc_ptrs.assign((size_t)in_rows * (size_t)nbreaks, nullptr);
for (int r = 0; r < in_rows; ++r) {
for (int i = 0; i < nbreaks; ++i) {
if (break_grans[i] == DateGranularity::None) continue;
int bc = break_cols[i];
if (bc < 0) continue;
const char* v = in_cells[r * in_cols + bc];
out.cell_backing.emplace_back(
truncate_date_local(v ? v : "", break_grans[i]));
trunc_ptrs[(size_t)r * nbreaks + i] = out.cell_backing.back().c_str();
}
}
}
auto cell_for = [&](int r, int i) -> const char* {
int bc = break_cols[i];
if (bc < 0) return "";
if (break_grans[i] != DateGranularity::None) {
return trunc_ptrs[(size_t)r * nbreaks + i];
}
const char* v = in_cells[r * in_cols + bc];
return v ? v : "";
};
auto make_key = [&](int r) -> std::string {
std::string k;
for (int i = 0; i < nbreaks; ++i) {
if (i > 0) k += '\x1f';
k += cell_for(r, i);
}
return k;
};
std::unordered_map<std::string, int> key_to_group;
std::vector<std::string> group_keys;
std::vector<std::vector<int>> group_rows;
std::vector<std::vector<const char*>> group_breakvals;
for (int r : visible) {
std::string k = make_key(r);
auto it = key_to_group.find(k);
int gi;
if (it == key_to_group.end()) {
gi = (int)group_rows.size();
key_to_group.emplace(k, gi);
group_keys.push_back(k);
group_rows.emplace_back();
std::vector<const char*> bv((size_t)nbreaks, "");
for (int i = 0; i < nbreaks; ++i) bv[i] = cell_for(r, i);
group_breakvals.push_back(std::move(bv));
} else gi = it->second;
group_rows[gi].push_back(r);
}
// Build output headers + types: breakouts + aggregation aliases.
int out_cols = (int)stage.breakouts.size() + (int)stage.aggregations.size();
out.cols = out_cols;
out.headers.reserve(out_cols);
out.types.reserve(out_cols);
for (int i = 0; i < nbreaks; ++i) {
out.headers.push_back(stage.breakouts[i]);
int bc = break_cols[i];
ColumnType ot = ColumnType::String;
if (break_grans[i] == DateGranularity::None
&& bc >= 0 && bc < (int)in_types.size()) {
ot = in_types[bc];
}
out.types.push_back(ot);
}
for (const auto& a : stage.aggregations) {
out.headers.push_back(aggregation_alias(a));
out.types.push_back(aggregation_type(a, in_headers, in_types));
}
// Compute aggregation per group.
int n_groups = (int)group_rows.size();
// Reserve exact size to prevent pointer invalidation.
out.cell_backing.reserve(out.cell_backing.size() + (size_t)n_groups * stage.aggregations.size() + 16);
auto store_backing = [&](const std::string& s) -> const char* {
out.cell_backing.push_back(s);
return out.cell_backing.back().c_str();
};
std::vector<const char*> flat;
flat.reserve((size_t)n_groups * out_cols);
for (int gi = 0; gi < n_groups; ++gi) {
for (size_t i = 0; i < stage.breakouts.size(); ++i) {
flat.push_back(group_breakvals[gi][i]);
}
for (const auto& a : stage.aggregations) {
if (a.fn == AggFn::Count) {
flat.push_back(store_backing(format_double((double)group_rows[gi].size())));
continue;
}
if (a.fn == AggFn::Distinct) {
int ac = find_col(in_headers, a.col);
if (ac < 0) { flat.push_back(store_backing("0")); continue; }
std::unordered_set<std::string> uniq;
for (int r : group_rows[gi]) {
const char* v = in_cells[r * in_cols + ac];
if (v && *v) uniq.insert(v);
}
flat.push_back(store_backing(format_double((double)uniq.size())));
continue;
}
int ac = find_col(in_headers, a.col);
if (ac < 0) { flat.push_back(store_backing("")); continue; }
// min/max over strings: preserve type
if ((a.fn == AggFn::Min || a.fn == AggFn::Max) &&
ac < (int)in_types.size() &&
(in_types[ac] == ColumnType::String || in_types[ac] == ColumnType::Date))
{
const char* best = nullptr;
for (int r : group_rows[gi]) {
const char* v = in_cells[r * in_cols + ac];
if (!v || !*v) continue;
if (!best) { best = v; continue; }
int c = std::strcmp(v, best);
if ((a.fn == AggFn::Min && c < 0) || (a.fn == AggFn::Max && c > 0)) best = v;
}
flat.push_back(best ? best : store_backing(""));
continue;
}
std::vector<double> vals;
vals.reserve(group_rows[gi].size());
for (int r : group_rows[gi]) {
const char* v = in_cells[r * in_cols + ac];
if (!v || !*v) continue;
double d;
if (parse_num(v, d)) vals.push_back(d);
}
double agg_val = compute_agg_numeric(a.fn, vals, a.arg);
flat.push_back(store_backing(format_double(agg_val)));
}
}
// Sort groups by stage.sorts (col-name lookup in out.headers).
std::vector<int> grp_idx(n_groups);
for (int i = 0; i < n_groups; ++i) grp_idx[i] = i;
apply_sorts(grp_idx, flat.data(), out_cols, out.headers, stage.sorts);
out.rows = n_groups;
out.cells.reserve((size_t)n_groups * out_cols);
for (int gi : grp_idx) {
for (int c = 0; c < out_cols; ++c) {
out.cells.push_back(flat[gi * out_cols + c]);
}
}
return out;
}
// ---------------------------------------------------------------------------
// State method implementations (declared in data_table_types.h).
// Placed here as the header documents "defined in compute_stage.cpp".
// Promoted from playground data_table_logic.cpp — issue 0081-I Wave 3.5.
// ---------------------------------------------------------------------------
void State::ensure_stage0() {
if (stages.empty()) stages.push_back(Stage{});
if (active_stage < 0) active_stage = 0;
if (active_stage >= (int)stages.size()) active_stage = (int)stages.size() - 1;
}
Stage& State::raw() {
ensure_stage0();
return stages[0];
}
const Stage& State::raw() const {
static thread_local Stage empty;
if (stages.empty()) return empty;
return stages[0];
}
Stage& State::active() {
ensure_stage0();
return stages[active_stage];
}
const Stage& State::active_const() const {
static thread_local Stage empty;
if (stages.empty()) return empty;
int a = active_stage;
if (a < 0 || a >= (int)stages.size()) a = 0;
return stages[a];
}
} // namespace data_table
+39
View File
@@ -0,0 +1,39 @@
// compute_stage — Pure TQL stage execution (filter → group+agg|passthrough → sort).
// Part of issue 0081-B: promoted from primitives_gallery playground to registry.
// No ImGui, no I/O. Depends only on data_table_types.h.
#pragma once
#include "core/data_table_types.h"
#include <string>
#include <vector>
namespace data_table {
// ----------------------------------------------------------------------------
// aggregation_alias — Pure: default alias when agg.alias is empty.
// count -> "count"
// distinct col -> "distinct_<col>"
// percentile p -> "p<arg*100>_<col>" (e.g. p95_size_kb)
// other -> "<fn>_<col>" (e.g. avg_size_kb)
// ----------------------------------------------------------------------------
std::string aggregation_alias(const Aggregation& a);
// aggregation_type — Pure: output ColumnType of an aggregation.
ColumnType aggregation_type(const Aggregation& a,
const std::vector<std::string>& in_headers,
const std::vector<ColumnType>& in_types);
// apply_filters — Pure: returns indices of rows passing all filters.
// Uses compare() for each cell vs filter value/op.
std::vector<int> apply_filters(const char* const* cells, int rows, int cols,
const std::vector<Filter>& filters);
// compute_stage — Pure: executes one Stage over in_cells.
// Pipeline: apply_filters -> (group+agg | passthrough) -> sort.
// Returns a StageOutput owning its cell_backing and cells pointer list.
StageOutput compute_stage(const char* const* in_cells, int in_rows, int in_cols,
const std::vector<std::string>& in_headers,
const std::vector<ColumnType>& in_types,
const Stage& stage);
} // namespace data_table
+80
View File
@@ -0,0 +1,80 @@
---
name: compute_stage
kind: function
lang: cpp
domain: core
version: "1.1.0"
purity: pure
signature: "data_table::StageOutput data_table::compute_stage(const char* const* in_cells, int in_rows, int in_cols, const std::vector<std::string>& in_headers, const std::vector<ColumnType>& in_types, const Stage& stage)"
description: "Executes one TQL Stage over a cell matrix: apply_filters -> (group+agg | passthrough) -> sort. Returns a StageOutput owning its cell backing. No ImGui, no I/O."
tags: [tables, pipeline, tql, filter, aggregation, sort, pure, cpp-tables]
uses_functions: [tql_helpers_cpp_core]
uses_types: [data_table_types_cpp_core]
returns: []
returns_optional: false
error_type: ""
imports:
- "core/tql_helpers.h"
tested: true
tests:
- "compute_stage passthrough returns all rows"
- "compute_stage filter eq keeps matching rows"
- "compute_stage group sum aggregates correctly"
- "compute_stage sort desc reorders rows"
- "compute_stage filter then group then sort asc"
- "apply_filters returns correct row indices"
- "aggregation_alias produces expected names"
- "compute_stage empty table empty stage"
test_file_path: "cpp/tests/test_compute_stage.cpp"
file_path: "cpp/functions/core/compute_stage.cpp"
params:
- name: in_cells
desc: "Row-major cell array (in_rows * in_cols pointers). May be null when in_rows == 0."
- name: in_rows
desc: "Number of input rows. 0 returns empty StageOutput."
- name: in_cols
desc: "Number of columns per row. Must match in_headers.size()."
- name: in_headers
desc: "Column names for in_cells. Used for breakout/sort/agg column lookup by name."
- name: in_types
desc: "Declared ColumnType per column. Used for aggregation output type and date truncation."
- name: stage
desc: "Stage descriptor: filters (row predicates), breakouts+aggregations (group-by), sorts (output order)."
output: "StageOutput with rows/cols/headers/types and cells pointer list into cell_backing. cell_backing owns all newly allocated strings (aggregation results, truncated dates). cells pointing to in_cells are not copied."
---
## Ejemplo
```cpp
#include "core/compute_stage.h"
using namespace data_table;
// 3 rows x 2 cols
const char* raw[] = {"eng","90000", "mktg","70000", "eng","95000"};
std::vector<std::string> hdrs = {"dept","salary"};
std::vector<ColumnType> types = {ColumnType::String, ColumnType::Int};
// Stage: filter dept=eng, group by dept, sum salary, sort desc
Stage s;
Filter f; f.col=0; f.op=Op::Eq; f.value="eng"; s.filters.push_back(f);
s.breakouts = {"dept"};
Aggregation agg; agg.fn=AggFn::Sum; agg.col="salary"; s.aggregations.push_back(agg);
SortClause sc; sc.col="sum_salary"; sc.desc=true; s.sorts.push_back(sc);
StageOutput out = compute_stage(raw, 3, 2, hdrs, types, s);
// out.rows == 1, out.headers == {"dept","sum_salary"}
// out.cells[0] == "eng", out.cells[1] == "185000"
```
## Cuando usarla
- Cuando necesites aplicar filtros, agrupaciones y ordenacion sobre datos tabulares raw (cell array) en un solo paso.
- Como bloque de construccion de `compute_pipeline` para procesar stage por stage.
- En tests headless de logica TQL sin renderizar nada.
## Gotchas
- `cell_backing` crece con `emplace_back`; la reserva previa evita reallocs durante grupos. No guardes punteros a `cell_backing` antes de llamar a la funcion — usa `out.cells` del resultado.
- Las celdas de passthrough apuntan directamente a `in_cells` (sin copiar). El caller debe mantener `in_cells` vivo mientras use `StageOutput`.
- Breakouts con sufijo `:granularity` (`:year`, `:month`, `:week`, `:day`, `:hour`) truncan fechas ISO. Breakouts sin sufijo usan el valor raw.
- `apply_filters` y `aggregation_alias` son funciones publicas del mismo header — usables independientemente.
+19 -1
View File
@@ -71,6 +71,24 @@ void load_fonts_from_settings() {
const AppSettings& s = settings();
const float size_px = s.font_size_px;
// Extended glyph range for the text font. ImGui's default is Latin-1 only,
// which leaves bullets/dots/arrows/checkmarks as `?`. Adds:
// General Punctuation (em-dash, ellipsis, smart quotes)
// Geometric Shapes (● ▲ ■ ▼ ▶ ◀ — used by data_table Dots renderer)
// Miscellaneous Symbols (★ ☆ ☑ ☒ ♦)
// Dingbats (✓ ✔ ✗ ✘ ✱ ❤)
// Misc Symbols & Arrows (⬆ ⬇ ⬅ ➡)
// Geometric Shapes Ext (🭨 etc.) — kept narrow to avoid bloat
static const ImWchar text_ranges[] = {
0x0020, 0x00FF, // Basic Latin + Latin-1
0x2010, 0x2027, // General Punctuation (em-dash, ellipsis, quotes)
0x25A0, 0x25FF, // Geometric Shapes (● ▲ ■ ◆ ...)
0x2600, 0x26FF, // Misc Symbols (★ ♥ ☑)
0x2700, 0x27BF, // Dingbats (✓ ✗ ✱)
0x2B00, 0x2BFF, // Arrows + Misc Symbols (⬆ ⬇ ⬅ ➡)
0,
};
// 1. Texto.
g_text_loaded = false;
if (s.font_id == FontId::ProggyClean) {
@@ -84,7 +102,7 @@ void load_fonts_from_settings() {
cfg.OversampleH = 2;
cfg.OversampleV = 1;
cfg.PixelSnapH = false;
if (io.Fonts->AddFontFromFileTTF(ttf.c_str(), size_px, &cfg)) {
if (io.Fonts->AddFontFromFileTTF(ttf.c_str(), size_px, &cfg, text_ranges)) {
g_text_loaded = true;
} else {
std::fprintf(stderr, "[fn_ui] AddFontFromFileTTF fallo (%s)\n", ttf.c_str());
+174
View File
@@ -0,0 +1,174 @@
// join_tables — Pure multi-key hash join between two tables.
// Promoted from cpp/apps/primitives_gallery/playground/tables/ (issue 0081-E).
// No ImGui, no I/O.
#include "core/join_tables.h"
#include <string>
#include <unordered_map>
#include <vector>
namespace data_table {
namespace {
// Find column index by name in a header list. Returns -1 if not found.
static int find_col_idx(const std::vector<std::string>& hdrs, const std::string& name) {
for (size_t i = 0; i < hdrs.size(); ++i)
if (hdrs[i] == name) return (int)i;
return -1;
}
// Build a composite key string from multiple columns in a row.
// Each key component is separated by \x1f (unit separator, unlikely in data).
// Missing or null columns produce an empty component followed by |\x1f to
// distinguish "missing column" from "empty value in a present column".
static std::string make_key(const char* const* cells, int row, int cols,
const std::vector<int>& key_cols) {
std::string k;
for (int c : key_cols) {
if (c < 0 || c >= cols) { k += "\x1f|"; continue; }
const char* s = cells[row * cols + c];
k += (s ? s : "");
k += "\x1f";
}
return k;
}
} // anon
StageOutput join_tables(const char* const* left_cells, int left_rows, int left_cols,
const std::vector<std::string>& left_headers,
const std::vector<ColumnType>& left_types,
const TableInput& right,
const Join& jn)
{
StageOutput out;
// Resolve key column indices in left and right.
std::vector<int> lk_idx, rk_idx;
for (const auto& p : jn.on) {
lk_idx.push_back(find_col_idx(left_headers, p.first));
rk_idx.push_back(find_col_idx(right.headers, p.second));
}
// Determine which right columns to include in output.
std::vector<int> right_fields;
if (jn.fields.empty()) {
for (int i = 0; i < right.cols; ++i) right_fields.push_back(i);
} else {
for (const auto& f : jn.fields) {
int i = find_col_idx(right.headers, f);
if (i >= 0) right_fields.push_back(i);
}
}
// Build output headers + types: all left columns, then right fields (aliased).
out.cols = left_cols + (int)right_fields.size();
out.headers.reserve(out.cols);
out.types.reserve(out.cols);
for (int c = 0; c < left_cols; ++c) {
out.headers.push_back(c < (int)left_headers.size() ? left_headers[c] : "");
out.types.push_back(c < (int)left_types.size() ? left_types[c] : ColumnType::Auto);
}
for (int rc : right_fields) {
std::string name = (rc < (int)right.headers.size()) ? right.headers[rc] : "";
std::string prefixed = jn.alias.empty() ? name : (jn.alias + "." + name);
out.headers.push_back(std::move(prefixed));
out.types.push_back(rc < (int)right.types.size() ? right.types[rc] : ColumnType::Auto);
}
// Hash right rows by composite key.
std::unordered_map<std::string, std::vector<int>> right_idx;
right_idx.reserve(right.rows);
for (int r = 0; r < right.rows; ++r) {
right_idx[make_key(right.cells, r, right.cols, rk_idx)].push_back(r);
}
// Track which right rows matched (needed for right/full outer).
std::vector<bool> right_matched(right.rows, false);
// Pre-size backing storage to avoid reallocation invalidating pointers.
out.cell_backing.reserve((size_t)(left_rows + right.rows) * (size_t)out.cols);
// Helpers to append rows into cell_backing.
auto append_left_row = [&](int lr) {
for (int c = 0; c < left_cols; ++c) {
const char* s = left_cells[lr * left_cols + c];
out.cell_backing.emplace_back(s ? s : "");
}
};
auto append_left_empty = [&]() {
for (int c = 0; c < left_cols; ++c) out.cell_backing.emplace_back("");
};
auto append_right_row = [&](int rr) {
for (int rc : right_fields) {
const char* s = (right.cells && rr >= 0 && rr < right.rows && rc >= 0 && rc < right.cols)
? right.cells[rr * right.cols + rc] : nullptr;
out.cell_backing.emplace_back(s ? s : "");
}
};
auto append_right_empty = [&]() {
for (int rc : right_fields) { (void)rc; out.cell_backing.emplace_back(""); }
};
// Strategy flags.
bool keep_unmatched_left = (jn.strategy == JoinStrategy::Left || jn.strategy == JoinStrategy::Full);
bool keep_unmatched_right = (jn.strategy == JoinStrategy::Right || jn.strategy == JoinStrategy::Full);
// For Right join we still iterate left to find matches; unmatched left rows are dropped.
bool iterate_left = (jn.strategy != JoinStrategy::Right)
? true
: true; // always iterate left to mark right_matched
int row_count = 0;
if (iterate_left) {
for (int lr = 0; lr < left_rows; ++lr) {
std::string k = make_key(left_cells, lr, left_cols, lk_idx);
auto it = right_idx.find(k);
if (it == right_idx.end() || it->second.empty()) {
// No match on right side.
if (keep_unmatched_left) {
append_left_row(lr);
append_right_empty();
++row_count;
}
// For Right/Inner: skip this left row.
continue;
}
// Matched: produce one output row per matching right row (Cartesian for duplicates).
for (int rr : it->second) {
// For Right join: emit only if we keep left+right matches.
// Inner/Left/Full all emit matched rows.
if (jn.strategy == JoinStrategy::Right) {
// Right join: left side appears for matched rows, left cells come first.
append_left_row(lr);
} else {
append_left_row(lr);
}
append_right_row(rr);
right_matched[rr] = true;
++row_count;
}
}
}
// Append unmatched right rows (Right join / Full outer).
if (keep_unmatched_right) {
for (int rr = 0; rr < right.rows; ++rr) {
if (right_matched[rr]) continue;
append_left_empty();
append_right_row(rr);
++row_count;
}
}
out.rows = row_count;
// Build pointer list after all backing strings are stable.
out.cells.reserve(out.cell_backing.size());
for (auto& s : out.cell_backing) out.cells.push_back(s.c_str());
return out;
}
} // namespace data_table
+38
View File
@@ -0,0 +1,38 @@
// join_tables — Pure multi-key hash join between two tables.
// Promoted from cpp/apps/primitives_gallery/playground/tables/ (issue 0081-E).
// No ImGui, no I/O. Depends only on data_table_types.h.
#pragma once
#include "core/data_table_types.h"
#include <string>
#include <vector>
namespace data_table {
// join_tables — Pure: applies one Join spec over a left table and a TableInput right.
//
// Strategy:
// Inner — only rows with at least one matching key in both sides.
// Left — all left rows; unmatched left rows get empty right cells.
// Right — all right rows; unmatched right rows get empty left cells.
// Full — union of left and right: unmatched rows from both sides included.
//
// Multi-key: jn.on is a list of {left_col, right_col} pairs; all pairs must
// match for a row to be considered a hit (AND semantics). Missing key columns
// (name not found in headers) produce an empty string for that key component.
//
// Key duplicates: produces Cartesian product for matching rows. Each (left_i,
// right_j) pair where both keys match becomes a separate output row.
//
// Output columns: all left columns, then the right columns listed in jn.fields
// (or all right columns if jn.fields is empty). Right column names are prefixed
// with "jn.alias." when jn.alias is non-empty.
//
// Returns a StageOutput owning its cell_backing and cells pointer list.
StageOutput join_tables(const char* const* left_cells, int left_rows, int left_cols,
const std::vector<std::string>& left_headers,
const std::vector<ColumnType>& left_types,
const TableInput& right,
const Join& jn);
} // namespace data_table
+111
View File
@@ -0,0 +1,111 @@
---
name: join_tables
kind: function
lang: cpp
domain: core
version: "1.0.0"
purity: pure
signature: "data_table::StageOutput data_table::join_tables(const char* const* left_cells, int left_rows, int left_cols, const std::vector<std::string>& left_headers, const std::vector<ColumnType>& left_types, const TableInput& right, const Join& jn)"
description: "Hash join multi-key entre dos tablas con 4 estrategias: inner, left, right y full outer. Soporta clave compuesta (N pares left_col/right_col), produce producto cartesiano para claves duplicadas, prefija columnas del derecho con alias cuando se especifica, y permite seleccionar un subconjunto de columnas del derecho via fields."
tags: [tables, join, tql, pure, cpp-tables]
uses_functions: []
uses_types: []
returns: []
returns_optional: false
error_type: ""
imports: []
tested: true
tests:
- "inner join basico"
- "left join con orphans"
- "right join con orphans"
- "full outer join"
- "multi-key join con 2 keys"
- "key con duplicados produce producto cartesiano"
- "alias prefija columnas del derecho"
- "fields subset incluye solo columnas especificadas"
- "right table vacia con left join devuelve todas las filas izquierda"
test_file_path: "cpp/tests/test_join_tables.cpp"
file_path: "cpp/functions/core/join_tables.cpp"
params:
- name: left_cells
desc: "Puntero row-major a las celdas de la tabla izquierda. Cada celda es const char* (null se trata como cadena vacia)."
- name: left_rows
desc: "Numero de filas de la tabla izquierda."
- name: left_cols
desc: "Numero de columnas de la tabla izquierda."
- name: left_headers
desc: "Nombres de columna de la tabla izquierda, en el mismo orden que left_cells."
- name: left_types
desc: "Tipos de columna de la tabla izquierda (ColumnType::Auto si no se conocen)."
- name: right
desc: "TableInput de la tabla derecha: name, headers, types, cells (row-major), rows, cols."
- name: jn
desc: "Join spec: strategy (Inner/Left/Right/Full), on (lista de pares {left_col,right_col}), alias (prefijo para columnas del derecho), fields (subconjunto de columnas del derecho; vacio = todas)."
output: "StageOutput con todas las filas resultado: cell_backing (strings owned), cells (punteros row-major), rows, cols, headers y types del output combinado."
---
# join_tables
Extrae e implementa la logica de join del playground TQL
(`primitives_gallery/playground/tables/data_table_logic.cpp`).
Pura: sin ImGui, sin I/O, sin estado global.
## Ejemplo
```cpp
#include "core/join_tables.h"
using namespace data_table;
// Tabla izquierda: usuarios
std::vector<std::string> lhdr = {"id", "name"};
std::vector<ColumnType> ltyp = {ColumnType::Int, ColumnType::String};
const char* lcells[] = { "1","Alice", "2","Bob", "3","Carol" };
// Tabla derecha: pedidos
TableInput right;
right.name = "orders";
right.headers = {"user_id", "amount"};
right.types = {ColumnType::Int, ColumnType::Float};
const char* rcells[] = { "1","100", "2","200", "2","150" };
right.cells = rcells;
right.rows = 3;
right.cols = 2;
// Inner join por id=user_id
Join jn;
jn.strategy = JoinStrategy::Inner;
jn.on = {{"id", "user_id"}};
jn.alias = "orders";
StageOutput res = join_tables(lcells, 3, 2, lhdr, ltyp, right, jn);
// res.rows == 3 (Alice->100, Bob->200, Bob->150; Carol sin match no aparece)
// res.headers == {"id","name","orders.user_id","orders.amount"}
```
## Cuando usarla
Cuando necesites combinar dos StageOutput o TableInput de distinto origen
antes de pasarlos a `compute_stage`. Por ejemplo, enriquecer una tabla de
eventos con metadatos de usuarios, o cruzar dos queries de Metabase antes
de agregar.
## Gotchas
- **Key con NULL/vacia**: una celda null o vacía en la clave se trata como
cadena vacía. Dos filas con clave vacía SI se emparejan entre sí (comportamiento
SQL-atípico; SQL NULL != NULL). Filtra filas con clave vacía antes del join
si no quieres este comportamiento.
- **Key compuesta con columna ausente**: si un nombre de columna en `jn.on`
no existe en los headers, ese componente de la clave se reemplaza por `\x1f|`
(placeholder distinto de cualquier valor real), lo que impide matches para
esa columna. Verifica los nombres de columna antes de llamar.
- **Tipos mixtos**: la comparación de claves es siempre lexicográfica sobre
strings. `"10"` y `"10.0"` NO hacen match aunque sean el mismo número.
Normaliza los tipos antes del join si es necesario.
- **Producto cartesiano**: con claves duplicadas en ambos lados, el output
crece como N_left_matches × N_right_matches. Un join sin filtrar sobre
tablas grandes con claves duplicadas puede producir salida muy grande.
- **Columnas del output**: el output tiene siempre left_cols + len(right_fields)
columnas, en ese orden. Las columnas de la clave del derecho aparecen
duplicadas si no se usa `fields` para excluirlas.
+289
View File
@@ -0,0 +1,289 @@
// llm_anthropic.cpp — Anthropic Messages API client via cURL popen.
// Promoted from primitives_gallery playground to registry, issue 0081.
// No link-time HTTP library. cURL must be in PATH at runtime.
#include "core/llm_anthropic.h"
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <sstream>
#include <string>
namespace llm_anthropic {
using namespace data_table;
namespace {
// JSON escape minimal — handles all ASCII control characters.
std::string json_escape(const std::string& s) {
std::string o;
o.reserve(s.size() + 8);
for (char c : s) {
switch (c) {
case '"': o += "\\\""; break;
case '\\': o += "\\\\"; break;
case '\n': o += "\\n"; break;
case '\r': o += "\\r"; break;
case '\t': o += "\\t"; break;
case '\b': o += "\\b"; break;
case '\f': o += "\\f"; break;
default:
if ((unsigned char)c < 0x20) {
char buf[8];
std::snprintf(buf, sizeof(buf), "\\u%04x", (int)(unsigned char)c);
o += buf;
} else {
o += c;
}
}
}
return o;
}
const char* col_type_doc(ColumnType t) {
switch (t) {
case ColumnType::String: return "string";
case ColumnType::Int: return "int";
case ColumnType::Float: return "float";
case ColumnType::Bool: return "bool";
case ColumnType::Date: return "date";
case ColumnType::Json: return "json";
case ColumnType::Auto: return "auto";
}
return "?";
}
std::string build_schema_block(const AskInput& in) {
std::ostringstream os;
os << "Available columns (stage 0 input):\n";
for (size_t i = 0; i < in.col_names.size(); ++i) {
os << " - " << in.col_names[i] << ": "
<< col_type_doc(i < in.col_types.size() ? in.col_types[i] : ColumnType::String)
<< "\n";
}
if (!in.joinable_names.empty()) {
os << "Joinable tables (for join clause):\n";
for (const auto& n : in.joinable_names) os << " - " << n << "\n";
}
return os.str();
}
std::string build_system_prompt(OutputMode mode) {
if (mode == OutputMode::TQL) {
return
"You are a TQL (Table Query Language) expert. Output ONLY a Lua code block. "
"TQL is a Lua table with shape:\n"
" return { version=1, display=\"table\"|\"bar\"|\"line\"|...,\n"
" main_source=\"name\", joins={ {alias,source,on,strategy,fields},... },\n"
" stages={ {filter={{op,col,value},...}, breakout={...}, aggregation={...}, sort={...} },... },\n"
" columns={ name = {type=\"int|float|...\", formula=\"[col]+1\"},... }\n"
" }\n"
"Stage 0 = Raw (filters + derived + sort, NO breakouts/aggs).\n"
"Stage 1+ groups (breakouts + aggregations).\n"
"Breakout granularity: append :year|:month|:week|:day|:hour to col name.\n"
"Aggregation functions: count|sum|avg|min|max|distinct|stddev|median|p25|p75|p90|p99|percentile.\n"
"Filter ops: '='|'!='|'<'|'<='|'>'|'>='|'contains'|'!contains'|'starts'|'ends'.\n"
"Sort: {{dir, col}, ...} where dir = 'asc'|'desc'.\n"
"Join strategies: 'left'|'inner'|'right'|'full'.\n"
"Formulas use Lua expression syntax with [col] for column refs.\n"
"Output format: ```lua\\n...\\n```";
}
return
"You are a DuckDB SQL expert. Output ONLY a SQL code block compatible with DuckDB.\n"
"Use CTEs to chain stages. Use date_trunc('month', col) for granularity.\n"
"Use quantile_cont(col, p) for percentiles. Use ? for bound params.\n"
"Joins: LEFT/INNER/RIGHT/FULL OUTER JOIN. String concat: ||. Aggregations: standard SQL.\n"
"Output format: ```sql\\n...\\n```";
}
// Reads API key in priority order: param > env FN_LLM_API_KEY > pass entry.
std::string resolve_api_key(const std::string& provided) {
if (!provided.empty()) return provided;
const char* env = std::getenv("FN_LLM_API_KEY");
if (env && *env) return env;
// pass anthropic/api-key | head -n1
FILE* p = popen("pass anthropic/api-key 2>/dev/null | head -n1", "r");
if (!p) return "";
std::string out;
char buf[256];
while (fgets(buf, sizeof(buf), p)) out += buf;
pclose(p);
while (!out.empty() && (out.back() == '\n' || out.back() == '\r')) out.pop_back();
return out;
}
} // anon
std::string build_request_body(const AskInput& in) {
std::string system_msg = build_system_prompt(in.mode);
std::string schema = build_schema_block(in);
std::ostringstream user_msg;
user_msg << "Question: " << in.question << "\n\n"
<< schema << "\n";
if (!in.tql_current.empty()) {
user_msg << "Current TQL:\n```lua\n" << in.tql_current << "\n```\n";
}
std::string model = in.model.empty() ? "claude-sonnet-4-6" : in.model;
std::ostringstream body;
body << "{"
<< "\"model\":\"" << json_escape(model) << "\","
<< "\"max_tokens\":" << in.max_tokens << ","
<< "\"system\":\"" << json_escape(system_msg) << "\","
<< "\"messages\":[{"
<< "\"role\":\"user\","
<< "\"content\":\"" << json_escape(user_msg.str()) << "\""
<< "}]"
<< "}";
return body.str();
}
std::string extract_code_block(const std::string& raw, const std::string& lang) {
std::string fence_lang = "```" + lang;
auto pos = raw.find(fence_lang);
size_t code_start = std::string::npos;
if (pos != std::string::npos) {
code_start = pos + fence_lang.size();
} else {
pos = raw.find("```");
if (pos != std::string::npos) {
code_start = pos + 3;
// skip optional lang tag
while (code_start < raw.size() && raw[code_start] != '\n' &&
raw[code_start] != '\r' && std::isalnum((unsigned char)raw[code_start])) {
++code_start;
}
}
}
if (code_start == std::string::npos) {
// No fence — return raw stripped.
size_t i = 0; while (i < raw.size() && std::isspace((unsigned char)raw[i])) ++i;
size_t j = raw.size(); while (j > i && std::isspace((unsigned char)raw[j-1])) --j;
return raw.substr(i, j - i);
}
// Skip newline after fence.
if (code_start < raw.size() && raw[code_start] == '\n') ++code_start;
auto end = raw.find("```", code_start);
if (end == std::string::npos) end = raw.size();
std::string code = raw.substr(code_start, end - code_start);
// Trim trailing newline.
while (!code.empty() && (code.back() == '\n' || code.back() == '\r')) code.pop_back();
return code;
}
std::string parse_response_text(const std::string& json) {
// Find pattern: "text":"..."
auto t = json.find("\"text\"");
while (t != std::string::npos) {
size_t i = t + 6;
// Skip whitespace and ':'
while (i < json.size() && (json[i] == ' ' || json[i] == ':' || json[i] == '\t')) ++i;
if (i >= json.size() || json[i] != '"') {
t = json.find("\"text\"", t + 1);
continue;
}
++i;
std::string out;
while (i < json.size() && json[i] != '"') {
if (json[i] == '\\' && i + 1 < json.size()) {
char esc = json[i+1];
if (esc == 'n') out += '\n';
else if (esc == 't') out += '\t';
else if (esc == 'r') out += '\r';
else if (esc == '"') out += '"';
else if (esc == '\\') out += '\\';
else if (esc == '/') out += '/';
else if (esc == 'u' && i + 5 < json.size()) {
// basic ASCII \uXXXX
int code = 0;
for (int k = 0; k < 4; ++k) {
char c = json[i + 2 + k];
int v = (c >= '0' && c <= '9') ? c - '0'
: (c >= 'a' && c <= 'f') ? c - 'a' + 10
: (c >= 'A' && c <= 'F') ? c - 'A' + 10 : 0;
code = code * 16 + v;
}
if (code < 128) out += (char)code;
else out += '?';
i += 5;
} else {
out += esc;
}
i += 2;
} else {
out += json[i++];
}
}
return out;
}
return "";
}
std::string call_api(const std::string& body, const std::string& api_key,
std::string& error_out) {
error_out.clear();
// Test injection: if FN_LLM_MOCK_RESPONSE is set, return it directly.
const char* mock = std::getenv("FN_LLM_MOCK_RESPONSE");
if (mock && *mock) return mock;
std::string key = resolve_api_key(api_key);
if (key.empty()) {
error_out = "no API key (set FN_LLM_API_KEY env, pass param, or `pass anthropic/api-key`)";
return "";
}
const char* endpoint_env = std::getenv("FN_LLM_ENDPOINT");
std::string endpoint = (endpoint_env && *endpoint_env)
? endpoint_env
: "https://api.anthropic.com/v1/messages";
// Write body to tmp file, invoke curl, read response from tmp output file.
// Portable Unix/MinGW (popen "w+" is not portable).
std::string tmp_in = std::tmpnam(nullptr);
std::string tmp_out = std::tmpnam(nullptr);
{
FILE* f = std::fopen(tmp_in.c_str(), "w");
if (!f) { error_out = "tmp file write fail"; return ""; }
std::fwrite(body.data(), 1, body.size(), f);
std::fclose(f);
}
std::string cmd = "curl -sS -X POST "
"-H \"content-type: application/json\" "
"-H \"anthropic-version: 2023-06-01\" "
"-H \"x-api-key: " + key + "\" "
"--data-binary @" + tmp_in + " " + endpoint
+ " > " + tmp_out + " 2>&1";
int rc = std::system(cmd.c_str());
std::string resp;
{
FILE* f = std::fopen(tmp_out.c_str(), "r");
if (f) {
char buf[4096];
size_t n;
while ((n = std::fread(buf, 1, sizeof(buf), f)) > 0) resp.append(buf, n);
std::fclose(f);
}
}
std::remove(tmp_in.c_str());
std::remove(tmp_out.c_str());
if (rc != 0) {
error_out = "curl exit " + std::to_string(rc) + ": " + resp;
return "";
}
return resp;
}
AskResult ask(const AskInput& in, const std::string& api_key) {
AskResult r;
std::string body = build_request_body(in);
std::string raw_json = call_api(body, api_key, r.error);
if (!r.error.empty()) return r;
r.raw = parse_response_text(raw_json);
std::string lang = (in.mode == OutputMode::TQL) ? "lua" : "sql";
r.code = extract_code_block(r.raw, lang);
return r;
}
} // namespace llm_anthropic
+61
View File
@@ -0,0 +1,61 @@
// llm_anthropic — Minimal Anthropic Messages API client via cURL popen.
// No external library deps — cURL is invoked via popen/system.
// Promoted from primitives_gallery playground to registry, issue 0081.
// Used by the "Ask AI" panel in data_table. Domain: core (logic + HTTP via popen,
// no link-time HTTP library required).
#pragma once
#include "core/data_table_types.h"
#include <string>
#include <vector>
namespace llm_anthropic {
enum class OutputMode { TQL, SQL };
struct AskInput {
std::string question; // NL question
std::string tql_current; // current TQL (emitted)
std::vector<std::string> col_names; // schema input
std::vector<data_table::ColumnType> col_types;
std::vector<std::string> joinable_names; // joinable tables
OutputMode mode = OutputMode::TQL;
std::string model; // empty -> default
int max_tokens = 8192;
};
struct AskResult {
std::string code; // extracted ```lua or ```sql block (no fences)
std::string raw; // full response text
std::string error; // non-empty on failure
int tokens_in = 0;
int tokens_out = 0;
};
// Pure: builds the system prompt + user message JSON-escaped.
// Returns the full JSON body for POST to /v1/messages.
std::string build_request_body(const AskInput& in);
// Pure: extracts first ```<lang>\n...\n``` block from `raw`. lang = "lua"|"sql".
// If no fence found, returns raw stripped of leading/trailing whitespace.
std::string extract_code_block(const std::string& raw, const std::string& lang);
// Pure: extracts text from Anthropic JSON response body.
// Finds `"content":[{"type":"text","text":"..."}]` and returns the text.
std::string parse_response_text(const std::string& json_body);
// Impure: invokes cURL via popen, posts `body` to Anthropic /v1/messages,
// returns raw response body (JSON). API key resolution order:
// 1. `api_key` parameter (if non-empty)
// 2. env var FN_LLM_API_KEY
// 3. `pass anthropic/api-key | head -n1`
// If env FN_LLM_MOCK_RESPONSE is set, returns its value (test injection).
// On error, sets error_out and returns "".
std::string call_api(const std::string& body, const std::string& api_key,
std::string& error_out);
// Impure: orchestrator — build_request_body + call_api + parse_response_text.
// Convenience wrapper for the "Ask AI" panel.
AskResult ask(const AskInput& in, const std::string& api_key = "");
} // namespace llm_anthropic
+80
View File
@@ -0,0 +1,80 @@
---
name: llm_anthropic
kind: function
lang: cpp
domain: core
version: "1.0.0"
purity: impure
signature: "llm_anthropic::AskResult llm_anthropic::ask(const llm_anthropic::AskInput& in, const std::string& api_key = \"\")"
description: "Minimal Anthropic Messages API client (Claude) via cURL popen. Builds a TQL or SQL expert prompt from column schema + NL question, POSTs to /v1/messages, and returns the extracted code block. Used by the Ask AI panel in data_table."
tags: [llm, anthropic, http, tables, cpp-tables]
uses_functions: []
uses_types:
- data_table_types_cpp_core
returns: []
returns_optional: false
error_type: "error_go_core"
imports:
- "core/llm_anthropic.h"
tested: true
tests:
- "build_request_body: contains model field"
- "build_request_body: contains messages array"
- "build_request_body: question appears in user content"
- "build_request_body: col_names appear in schema block"
- "build_request_body: custom model overrides default"
- "build_request_body: SQL mode produces SQL system prompt"
- "build_request_body: current TQL appears in body"
- "build_request_body: joinable tables appear in schema"
- "extract_code_block: extracts lua fence"
- "extract_code_block: extracts sql fence"
- "extract_code_block: fallback to plain fence without lang"
- "extract_code_block: no fence returns stripped raw"
- "parse_response_text: extracts text from Anthropic response JSON"
- "parse_response_text: handles escaped newline in text"
- "parse_response_text: empty text on missing field"
- "call_api: mock response injection via env var"
- "ask: mock roundtrip produces code block"
test_file_path: "cpp/tests/test_llm_anthropic.cpp"
file_path: "cpp/functions/core/llm_anthropic.cpp"
params:
- name: in
desc: "AskInput with: question (NL), col_names + col_types (table schema), tql_current (optional current TQL to refine), joinable_names (available tables for JOIN), mode (TQL or SQL output), model (empty = claude-sonnet-4-6), max_tokens."
- name: api_key
desc: "Anthropic API key. If empty, resolved from env FN_LLM_API_KEY, then `pass anthropic/api-key | head -n1`."
output: "AskResult with: code (extracted Lua or SQL block, no fences), raw (full response text), error (non-empty on failure), tokens_in/tokens_out (0 in v1 — not yet parsed from response)."
---
## Ejemplo
```cpp
#include "core/llm_anthropic.h"
llm_anthropic::AskInput in;
in.question = "show me total sales grouped by region";
in.col_names = {"region", "amount", "date"};
in.col_types = {data_table::ColumnType::String,
data_table::ColumnType::Float,
data_table::ColumnType::Date};
in.mode = llm_anthropic::OutputMode::TQL;
// API key from env FN_LLM_API_KEY or pass anthropic/api-key
llm_anthropic::AskResult r = llm_anthropic::ask(in);
if (r.error.empty()) {
// r.code -> "return { version=1, display=\"bar\", ... }"
}
```
## Cuando usarla
Cuando el usuario hace una pregunta en lenguaje natural sobre una tabla en el panel Ask AI de data_table, y necesitas generar TQL o SQL equivalente via Claude. El caller aplica el TQL con `tql_apply` o ejecuta el SQL via DuckDB.
## Gotchas
- **API key obligatoria**: sin FN_LLM_API_KEY ni `pass anthropic/api-key`, `ask` devuelve `error` con el mensaje. No crashea.
- **cURL en PATH**: la implementacion usa `std::system("curl ...")` + archivos temporales. Si `curl` no está disponible, el exit code != 0 y el error se captura en `AskResult.error`.
- **Rate limits**: la API de Anthropic tiene rate limits por tier. Errores 429 aparecen en el JSON de respuesta; actualmente no hay retry — el caller debe manejarlos.
- **Tmpnam deprecated**: usa `std::tmpnam` — genera warning en compiladores modernos. Suficiente para uso interactivo; no apto para alta concurrencia.
- **Test injection**: para tests sin API key, setea `FN_LLM_MOCK_RESPONSE` con el JSON de respuesta Anthropic deseado. `call_api` lo retorna directamente sin hacer HTTP.
- **tokens_in/tokens_out**: siempre 0 en v1 (el campo existe en la respuesta JSON pero no se parsea aún).
- **FN_LLM_ENDPOINT**: override del endpoint para pruebas con proxies o endpoints alternativos.
+577
View File
@@ -0,0 +1,577 @@
// lua_engine.cpp — implementacion del sandbox Lua 5.4 para formulas.
// Extraido del playground tables (issue 0081-D).
#include "core/lua_engine.h"
#include "core/auto_detect_type.h" // parse_number, is_bool_text, etc.
extern "C" {
#include "lua.h"
#include "lualib.h"
#include "lauxlib.h"
}
#include <cctype>
#include <cstring>
#include <cstdio>
#include <string>
namespace lua_engine {
struct Engine {
lua_State* L = nullptr;
std::vector<RowCtx*> ctx_stack;
std::vector<int> visiting_derived;
};
namespace {
Engine* g_engine = nullptr;
Engine* engine_from_state(lua_State* L) {
return *static_cast<Engine**>(lua_getextraspace(L));
}
RowCtx* current_ctx(lua_State* L) {
Engine* e = engine_from_state(L);
if (!e || e->ctx_stack.empty()) return nullptr;
return e->ctx_stack.back();
}
// ---------------------------------------------------------------------------
// Push de cell respetando tipo declarado:
// Int/Float -> number (integer si exacto)
// Bool -> boolean (true/false/1/0); en otro caso push string
// Date/String/Json/Auto -> string
// Si types_orig == nullptr -> heuristica: parse_number; si parsea -> number.
// ---------------------------------------------------------------------------
void push_typed(lua_State* L, const char* v, data_table::ColumnType t) {
if (!v || !*v) { lua_pushnil(L); return; }
using data_table::ColumnType;
using data_table::parse_number;
if (t == ColumnType::Int) {
double d;
if (parse_number(v, d)) {
long long iv = (long long)d;
if ((double)iv == d) lua_pushinteger(L, (lua_Integer)iv);
else lua_pushnumber (L, (lua_Number)d);
} else lua_pushstring(L, v);
return;
}
if (t == ColumnType::Float) {
double d;
if (parse_number(v, d)) {
long long iv = (long long)d;
if ((double)iv == d) lua_pushinteger(L, (lua_Integer)iv);
else lua_pushnumber (L, (lua_Number)d);
} else lua_pushstring(L, v);
return;
}
if (t == ColumnType::Bool) {
if (std::strcmp(v, "true") == 0 || std::strcmp(v, "1") == 0) lua_pushboolean(L, 1);
else if (std::strcmp(v, "false") == 0 || std::strcmp(v, "0") == 0) lua_pushboolean(L, 0);
else lua_pushstring(L, v);
return;
}
if (t == ColumnType::Auto) {
// Sin tipo declarado: heuristica. parse_number -> number, else string.
double d;
if (parse_number(v, d)) {
long long iv = (long long)d;
if ((double)iv == d) lua_pushinteger(L, (lua_Integer)iv);
else lua_pushnumber (L, (lua_Number)d);
} else lua_pushstring(L, v);
return;
}
// String / Date / Json
lua_pushstring(L, v);
}
// Fwd: para recursion en row_index.
std::string eval_internal(Engine* e, int id, const RowCtx& ctx, std::string* err_out);
int row_index(lua_State* L) {
Engine* eng = engine_from_state(L);
RowCtx* ctx = current_ctx(L);
if (!ctx) { lua_pushnil(L); return 1; }
using data_table::ColumnType;
auto get_orig_type = [&](int c) -> ColumnType {
if (ctx->types_orig && c < ctx->n_types_orig) return ctx->types_orig[c];
return ColumnType::Auto;
};
if (lua_type(L, 2) == LUA_TSTRING) {
const char* key = lua_tostring(L, 2);
if (ctx->name_to_col) {
auto it = ctx->name_to_col->find(key);
if (it != ctx->name_to_col->end()) {
int col = it->second;
push_typed(L, ctx->cells[ctx->row * ctx->orig_cols + col], get_orig_type(col));
return 1;
}
}
if (ctx->derived_name_to_idx && ctx->derived) {
auto it = ctx->derived_name_to_idx->find(key);
if (it != ctx->derived_name_to_idx->end()) {
int didx = it->second;
if (didx < 0 || didx >= (int)ctx->derived->size()) {
lua_pushnil(L); return 1;
}
// cycle check
for (int v : eng->visiting_derived) {
if (v == didx) { lua_pushnil(L); return 1; }
}
const auto& d = (*ctx->derived)[didx];
if (d.formula.empty()) {
// retipo puro
if (d.source_col < 0 || d.source_col >= ctx->orig_cols) {
lua_pushnil(L); return 1;
}
push_typed(L, ctx->cells[ctx->row * ctx->orig_cols + d.source_col], d.type);
} else if (d.lua_id < 0) {
lua_pushnil(L);
} else {
eng->visiting_derived.push_back(didx);
std::string err;
std::string r = eval_internal(eng, d.lua_id, *ctx, &err);
eng->visiting_derived.pop_back();
push_typed(L, r.c_str(), d.type);
}
return 1;
}
}
lua_pushnil(L);
return 1;
}
if (lua_type(L, 2) == LUA_TNUMBER) {
int idx = (int)lua_tointeger(L, 2);
if (idx >= 1 && idx <= ctx->orig_cols) {
int col = idx - 1;
push_typed(L, ctx->cells[ctx->row * ctx->orig_cols + col], get_orig_type(col));
return 1;
}
}
lua_pushnil(L);
return 1;
}
// --- fn.* builtins ---
int b_upper(lua_State* L) {
const char* s = luaL_checkstring(L, 1);
std::string out(s);
for (char& c : out) if (c >= 'a' && c <= 'z') c -= 32;
lua_pushlstring(L, out.data(), out.size());
return 1;
}
int b_lower(lua_State* L) {
const char* s = luaL_checkstring(L, 1);
std::string out(s);
for (char& c : out) if (c >= 'A' && c <= 'Z') c += 32;
lua_pushlstring(L, out.data(), out.size());
return 1;
}
int b_length(lua_State* L) {
if (lua_isnil(L, 1)) { lua_pushinteger(L, 0); return 1; }
const char* s = luaL_checkstring(L, 1);
lua_pushinteger(L, (lua_Integer)std::strlen(s));
return 1;
}
int b_substring(lua_State* L) {
const char* s = luaL_checkstring(L, 1);
int start = (int)luaL_checkinteger(L, 2);
int len = (int)luaL_optinteger(L, 3, -1);
int slen = (int)std::strlen(s);
if (start < 1) start = 1;
if (start > slen) { lua_pushlstring(L, "", 0); return 1; }
int from = start - 1;
int take = (len < 0) ? slen - from : len;
if (from + take > slen) take = slen - from;
lua_pushlstring(L, s + from, take);
return 1;
}
int b_contains(lua_State* L) {
const char* h = luaL_checkstring(L, 1);
const char* n = luaL_checkstring(L, 2);
lua_pushboolean(L, std::strstr(h, n) != nullptr);
return 1;
}
int b_starts_with(lua_State* L) {
const char* h = luaL_checkstring(L, 1);
const char* n = luaL_checkstring(L, 2);
size_t ln = std::strlen(n);
lua_pushboolean(L, std::strncmp(h, n, ln) == 0);
return 1;
}
int b_ends_with(lua_State* L) {
const char* h = luaL_checkstring(L, 1);
const char* n = luaL_checkstring(L, 2);
size_t lh = std::strlen(h), ln = std::strlen(n);
lua_pushboolean(L, ln <= lh && std::strcmp(h + lh - ln, n) == 0);
return 1;
}
int b_replace(lua_State* L) {
const char* s = luaL_checkstring(L, 1);
const char* find = luaL_checkstring(L, 2);
const char* repl = luaL_checkstring(L, 3);
std::string out;
size_t flen = std::strlen(find);
if (flen == 0) { lua_pushstring(L, s); return 1; }
for (const char* p = s; *p; ) {
if (std::strncmp(p, find, flen) == 0) { out += repl; p += flen; }
else { out += *p++; }
}
lua_pushlstring(L, out.data(), out.size());
return 1;
}
int b_trim(lua_State* L) {
const char* s = luaL_checkstring(L, 1);
while (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r') ++s;
const char* e = s + std::strlen(s);
while (e > s && (e[-1] == ' ' || e[-1] == '\t' || e[-1] == '\n' || e[-1] == '\r')) --e;
lua_pushlstring(L, s, e - s);
return 1;
}
int b_concat(lua_State* L) {
int n = lua_gettop(L);
std::string out;
for (int i = 1; i <= n; ++i) {
size_t sl = 0;
const char* s = luaL_tolstring(L, i, &sl);
out.append(s, sl);
lua_pop(L, 1);
}
lua_pushlstring(L, out.data(), out.size());
return 1;
}
int b_to_number(lua_State* L) {
if (lua_isnumber(L, 1)) { lua_pushvalue(L, 1); return 1; }
const char* s = luaL_checkstring(L, 1);
char* end = nullptr;
double v = std::strtod(s, &end);
if (end == s) { lua_pushnil(L); return 1; }
lua_pushnumber(L, v);
return 1;
}
int b_to_string(lua_State* L) { luaL_tolstring(L, 1, nullptr); return 1; }
int b_to_bool(lua_State* L) {
if (lua_isboolean(L, 1)) { lua_pushvalue(L, 1); return 1; }
const char* s = luaL_optstring(L, 1, "");
lua_pushboolean(L, std::strcmp(s, "true") == 0 || std::strcmp(s, "1") == 0);
return 1;
}
int b_is_null(lua_State* L) { lua_pushboolean(L, lua_isnil(L, 1)); return 1; }
int b_is_empty(lua_State* L) {
if (lua_isnil(L, 1)) { lua_pushboolean(L, 1); return 1; }
const char* s = luaL_optstring(L, 1, "");
lua_pushboolean(L, *s == 0);
return 1;
}
int b_coalesce(lua_State* L) {
int n = lua_gettop(L);
for (int i = 1; i <= n; ++i) {
if (!lua_isnil(L, i)) { lua_pushvalue(L, i); return 1; }
}
lua_pushnil(L);
return 1;
}
int b_parse_date(lua_State* L) {
const char* s = luaL_checkstring(L, 1);
if (std::strlen(s) < 10) { lua_pushnil(L); return 1; }
int y, m, d;
if (std::sscanf(s, "%d-%d-%d", &y, &m, &d) != 3) { lua_pushnil(L); return 1; }
lua_createtable(L, 0, 3);
lua_pushinteger(L, y); lua_setfield(L, -2, "year");
lua_pushinteger(L, m); lua_setfield(L, -2, "month");
lua_pushinteger(L, d); lua_setfield(L, -2, "day");
return 1;
}
int b_year(lua_State* L) {
const char* s = luaL_checkstring(L, 1);
int y; if (std::sscanf(s, "%d", &y) != 1) { lua_pushnil(L); return 1; }
lua_pushinteger(L, y); return 1;
}
int b_month(lua_State* L) {
const char* s = luaL_checkstring(L, 1);
int y, m; if (std::sscanf(s, "%d-%d", &y, &m) != 2) { lua_pushnil(L); return 1; }
lua_pushinteger(L, m); return 1;
}
int b_day(lua_State* L) {
const char* s = luaL_checkstring(L, 1);
int y, m, d; if (std::sscanf(s, "%d-%d-%d", &y, &m, &d) != 3) { lua_pushnil(L); return 1; }
lua_pushinteger(L, d); return 1;
}
void apply_sandbox(lua_State* L) {
const char* nuke[] = { "io", "require", "loadfile", "dofile", "load",
"package", "debug", nullptr };
for (int i = 0; nuke[i]; ++i) {
lua_pushnil(L);
lua_setglobal(L, nuke[i]);
}
lua_getglobal(L, "os");
if (lua_istable(L, -1)) {
lua_createtable(L, 0, 4);
const char* keep[] = {"date", "time", "difftime", "clock", nullptr};
for (int i = 0; keep[i]; ++i) {
lua_getfield(L, -2, keep[i]);
lua_setfield(L, -2, keep[i]);
}
lua_setglobal(L, "os");
}
lua_pop(L, 1);
}
void register_builtins(lua_State* L) {
lua_createtable(L, 0, 24);
#define R(name, fn) lua_pushcfunction(L, fn); lua_setfield(L, -2, name);
R("upper", b_upper);
R("lower", b_lower);
R("length", b_length);
R("substring", b_substring);
R("contains", b_contains);
R("starts_with", b_starts_with);
R("ends_with", b_ends_with);
R("replace", b_replace);
R("trim", b_trim);
R("concat", b_concat);
R("to_number", b_to_number);
R("to_string", b_to_string);
R("to_bool", b_to_bool);
R("is_null", b_is_null);
R("is_empty", b_is_empty);
R("coalesce", b_coalesce);
R("parse_date", b_parse_date);
R("year", b_year);
R("month", b_month);
R("day", b_day);
#undef R
lua_setglobal(L, "fn");
}
void install_row_metatable(lua_State* L) {
luaL_newmetatable(L, "fn_row_meta");
lua_pushcfunction(L, row_index);
lua_setfield(L, -2, "__index");
lua_pop(L, 1);
}
// ---------------------------------------------------------------------------
// Preprocesador: [col] -> row["col"] respetando strings y comentarios.
// Auto-prepend `return` si la formula es expresion suelta.
// ---------------------------------------------------------------------------
bool ident_start(unsigned char c) {
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_' || c >= 0x80;
}
// Para nombres de cols dentro de [name]: permite espacios para "col with space"
// y '.' para futuro `alias.col` post-join.
bool ident_cont(unsigned char c) {
return ident_start(c) || (c >= '0' && c <= '9') || c == ' ' || c == '.';
}
// Para boundary de keywords Lua: NO permite espacio.
bool word_char(unsigned char c) {
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||
(c >= '0' && c <= '9') || c == '_' || c >= 0x80;
}
bool kw_at(const std::string& s, size_t i, const char* kw) {
size_t k = std::strlen(kw);
if (i + k > s.size()) return false;
if (s.compare(i, k, kw) != 0) return false;
if (i + k == s.size()) return true;
unsigned char nc = (unsigned char)s[i + k];
return !word_char(nc);
}
bool needs_auto_return(const std::string& body) {
size_t i = 0;
while (i < body.size()) {
char c = body[i];
if (c == ' ' || c == '\t' || c == '\n' || c == '\r') { ++i; continue; }
// skip short comment
if (c == '-' && i + 1 < body.size() && body[i+1] == '-') {
// long comment?
if (i + 3 < body.size() && body[i+2] == '[' && body[i+3] == '[') {
size_t j = i + 4;
while (j + 1 < body.size() && !(body[j] == ']' && body[j+1] == ']')) ++j;
i = (j + 1 < body.size()) ? j + 2 : body.size();
continue;
}
while (i < body.size() && body[i] != '\n') ++i;
continue;
}
break;
}
if (i >= body.size()) return false;
const char* kws[] = {"return","if","for","while","do","local","repeat","function", nullptr};
for (int k = 0; kws[k]; ++k) if (kw_at(body, i, kws[k])) return false;
return true;
}
std::string brackets_pass(const std::string& src) {
std::string out;
out.reserve(src.size() + 16);
size_t i = 0;
while (i < src.size()) {
char c = src[i];
// strings
if (c == '"' || c == '\'') {
char q = c;
out += c; ++i;
while (i < src.size()) {
char d = src[i];
out += d; ++i;
if (d == '\\' && i < src.size()) { out += src[i++]; continue; }
if (d == q) break;
if (d == '\n') break;
}
continue;
}
// comentario corto / largo
if (c == '-' && i + 1 < src.size() && src[i+1] == '-') {
// long: --[[ ... ]]
if (i + 3 < src.size() && src[i+2] == '[' && src[i+3] == '[') {
out.append(src, i, 4); i += 4;
while (i + 1 < src.size() && !(src[i] == ']' && src[i+1] == ']')) {
out += src[i++];
}
if (i + 1 < src.size()) { out += src[i++]; out += src[i++]; }
continue;
}
// short
while (i < src.size() && src[i] != '\n') { out += src[i++]; }
continue;
}
// long string [[ ... ]]
if (c == '[' && i + 1 < src.size() && src[i+1] == '[') {
out.append(src, i, 2); i += 2;
while (i + 1 < src.size() && !(src[i] == ']' && src[i+1] == ']')) {
out += src[i++];
}
if (i + 1 < src.size()) { out += src[i++]; out += src[i++]; }
continue;
}
// bracket col-ref [name]
if (c == '[') {
// peek if next is valid ident_start
if (i + 1 < src.size() && ident_start((unsigned char)src[i+1])) {
size_t j = i + 1;
while (j < src.size() && src[j] != ']' && src[j] != '\n') {
if (!ident_cont((unsigned char)src[j])) { j = std::string::npos; break; }
++j;
}
if (j != std::string::npos && j < src.size() && src[j] == ']') {
std::string name(src, i + 1, j - i - 1);
// trim trailing space
while (!name.empty() && name.back() == ' ') name.pop_back();
out += "row[\"";
out += name;
out += "\"]";
i = j + 1;
continue;
}
}
}
out += c;
++i;
}
return out;
}
} // anon
std::string preprocess(const std::string& body) {
std::string pre = brackets_pass(body);
if (needs_auto_return(pre)) return "return " + pre;
return pre;
}
namespace {
std::string eval_internal(Engine* e, int id, const RowCtx& ctx, std::string* err_out) {
if (!e || !e->L || id < 0) {
if (err_out) *err_out = "invalid handle";
return "";
}
lua_State* L = e->L;
e->ctx_stack.push_back(const_cast<RowCtx*>(&ctx));
lua_rawgeti(L, LUA_REGISTRYINDEX, id);
lua_newuserdata(L, 1);
luaL_setmetatable(L, "fn_row_meta");
int rc = lua_pcall(L, 1, 1, 0);
e->ctx_stack.pop_back();
if (rc != LUA_OK) {
if (err_out) *err_out = lua_tostring(L, -1) ? lua_tostring(L, -1) : "runtime error";
lua_pop(L, 1);
return "";
}
std::string out;
if (lua_isnil(L, -1)) out = "";
else {
size_t n = 0;
const char* s = luaL_tolstring(L, -1, &n);
out.assign(s, n);
lua_pop(L, 1);
}
lua_pop(L, 1);
return out;
}
} // anon
Engine* get() {
if (g_engine) return g_engine;
g_engine = new Engine();
g_engine->L = luaL_newstate();
luaL_openlibs(g_engine->L);
*static_cast<Engine**>(lua_getextraspace(g_engine->L)) = g_engine;
apply_sandbox(g_engine->L);
register_builtins(g_engine->L);
install_row_metatable(g_engine->L);
return g_engine;
}
void shutdown() {
if (!g_engine) return;
lua_close(g_engine->L);
delete g_engine;
g_engine = nullptr;
}
int compile(Engine* e, const std::string& body, std::string* err_out) {
if (!e || !e->L) { if (err_out) *err_out = "engine null"; return -1; }
lua_State* L = e->L;
std::string final_body = preprocess(body);
std::string wrapped = "return function(row)\n" + final_body + "\nend";
if (luaL_loadbufferx(L, wrapped.data(), wrapped.size(), "formula", "t") != LUA_OK) {
if (err_out) *err_out = lua_tostring(L, -1) ? lua_tostring(L, -1) : "parse error";
lua_pop(L, 1);
return -1;
}
if (lua_pcall(L, 0, 1, 0) != LUA_OK) {
if (err_out) *err_out = lua_tostring(L, -1) ? lua_tostring(L, -1) : "compile error";
lua_pop(L, 1);
return -1;
}
if (!lua_isfunction(L, -1)) {
if (err_out) *err_out = "formula did not produce a function";
lua_pop(L, 1);
return -1;
}
int ref = luaL_ref(L, LUA_REGISTRYINDEX);
return ref;
}
void release(Engine* e, int id) {
if (!e || !e->L || id < 0) return;
luaL_unref(e->L, LUA_REGISTRYINDEX, id);
}
std::string eval(Engine* e, int id, const RowCtx& ctx, std::string* err_out) {
return eval_internal(e, id, ctx, err_out);
}
lua_State* raw_state() {
Engine* e = get();
return e ? e->L : nullptr;
}
} // namespace lua_engine
+72
View File
@@ -0,0 +1,72 @@
// lua_engine — Lua 5.4 sandbox para evaluar formulas de columnas.
//
// Extraido del playground tables (issue 0081-D).
//
// Features:
// - Sandbox: io/require/dofile/loadfile/load/package/debug fuera.
// os reducido a date/time/difftime/clock.
// - Builtins fn.* (~20 funciones de string/tipo/fecha).
// - Sintaxis [col_name] preprocesada a row["col_name"].
// - Auto-`return` si la formula es expresion sin keyword inicial.
// - Type-aware push: columna Int/Float -> number, Bool -> boolean,
// String/Date/Json/Auto -> string (heuristica si Auto).
// - Llamadas recursivas entre derived cols; ciclos cortados con nil.
//
// Thread-safety: NO thread-safe. Una instancia (Engine) por hilo.
// Ver ## Gotchas en lua_engine.md.
#pragma once
#include "core/data_table_types.h"
#include <string>
#include <unordered_map>
#include <vector>
// Forward declaration del C struct de Lua (definido en lua.h).
struct lua_State;
namespace lua_engine {
struct Engine;
// Ciclo de vida del motor (singleton por proceso/hilo).
Engine* get();
void shutdown();
// Compila el body de la formula y devuelve un id de referencia (>= 0).
// Retorna -1 en error; err_out recibe el mensaje si no es nullptr.
int compile(Engine* e, const std::string& body, std::string* err_out);
// Libera la referencia compilada.
void release(Engine* e, int id);
// Contexto de fila pasado a eval().
struct RowCtx {
const char* const* cells = nullptr; // row-major flat array
int orig_cols = 0;
int row = 0;
const std::vector<std::string>* header_names = nullptr;
const std::unordered_map<std::string,int>* name_to_col = nullptr;
// Tipos declarados de las cols originales. nullptr -> heuristica Auto.
const data_table::ColumnType* types_orig = nullptr;
int n_types_orig = 0;
// Derived cols + lookup por nombre (retipados y formulas).
const std::vector<data_table::DerivedColumn>* derived = nullptr;
const std::unordered_map<std::string,int>* derived_name_to_idx = nullptr;
};
// Evalua la formula compilada sobre el contexto de fila dado.
// Devuelve el resultado como string. En error devuelve "" y escribe en err_out.
std::string eval(Engine* e, int id, const RowCtx& ctx, std::string* err_out);
// Helper expuesto para tests: preprocesa `[col]` -> `row["col"]`
// respetando strings y comentarios. Aplica auto-return si es expresion.
std::string preprocess(const std::string& body);
// Acceso al lua_State subyacente. Uso restringido: tql.cpp parsea chunks
// (return { ... }) y recorre tablas. NO usar para nada que rompa el sandbox.
::lua_State* raw_state();
} // namespace lua_engine
+100
View File
@@ -0,0 +1,100 @@
---
name: lua_engine
kind: function
lang: cpp
domain: core
version: "1.0.0"
purity: impure
signature: "lua_engine::Engine* lua_engine::get(); void lua_engine::shutdown(); int lua_engine::compile(Engine*, const std::string& body, std::string* err_out); void lua_engine::release(Engine*, int id); std::string lua_engine::eval(Engine*, int id, const RowCtx& ctx, std::string* err_out); std::string lua_engine::preprocess(const std::string& body)"
description: "Motor Lua 5.4 sandbox para evaluar formulas de columnas. Sandbox quita io/require/dofile/loadfile/load/package/debug; os reducido a date/time/difftime/clock. Builtins fn.* (~20 funciones de string/tipo/fecha). Preprocesa [col_name] -> row[\"col_name\"]. Auto-return si la formula es expresion. Type-aware push segun ColumnType declarado."
tags: [tables, lua, sandbox, formula, tql, cpp-tables]
uses_functions: []
uses_types: ["data_table_types_cpp_core"]
returns: []
returns_optional: true
error_type: "error_go_core"
imports: [lua.h, lualib.h, lauxlib.h]
tested: true
tests:
- "eval expr simple"
- "eval con vars via RowCtx"
- "error en expr invalida"
- "sandbox bloquea io.open"
- "preprocess convierte [col] a row[\"col\"]"
- "fn.* builtins disponibles"
- "shutdown y reinicio"
test_file_path: "cpp/tests/test_lua_engine.cpp"
file_path: "cpp/functions/core/lua_engine.cpp"
params:
- name: body
desc: "Cuerpo de la formula Lua. Expresiones sueltas reciben auto-return. Usa [col_name] para referenciar columnas del RowCtx."
- name: ctx
desc: "RowCtx con puntero al array cells row-major, cabeceras, mapa nombre->columna, tipos declarados y columnas derivadas. nullptr en campos opcionales activa heuristica Auto."
output: "std::string con el resultado de la formula convertido a string via luaL_tolstring. Vacio + err_out poblado si hay error de compilacion o runtime."
notes: "Extraido del playground tables (issue 0081-D). Enlaza contra lua54 (vendored en cpp/vendor/lua). Linker: target_link_libraries(tu_target PRIVATE lua54). Origen: cpp/apps/primitives_gallery/playground/tables/lua_engine.{h,cpp}."
---
# lua_engine
Motor Lua 5.4 sandbox para evaluar formulas de columnas en el stack TQL. Diseñado para que `data_table` (y en el futuro `data_table_cpp_viz`) compile y evalúe expresiones definidas por el usuario sin acceso al filesystem ni a la red.
## API
```cpp
#include "core/lua_engine.h"
// Ciclo de vida (singleton por proceso/hilo)
lua_engine::Engine* e = lua_engine::get();
lua_engine::shutdown(); // libera el lua_State; get() crea uno nuevo si se llama de nuevo
// Compilar una formula (una vez por formula, N evals)
std::string err;
int id = lua_engine::compile(e, "[price] * [qty]", &err); // id >= 0 si ok
if (id < 0) { /* err tiene el mensaje */ }
// Evaluar sobre una fila
lua_engine::RowCtx ctx = { cells, ncols, row_index, &headers, &name_to_col };
std::string result = lua_engine::eval(e, id, ctx, &err);
// Liberar cuando ya no se necesite la formula
lua_engine::release(e, id);
```
## Ejemplo
```cpp
#include "core/lua_engine.h"
// Formula: multiplica price por qty
lua_engine::Engine* e = lua_engine::get();
std::string err;
int id = lua_engine::compile(e, "[price] * [qty]", &err);
// Tabla: 1 fila, cols = {price="10.5", qty="3"}
const char* cells[] = { "10.5", "3" };
std::vector<std::string> headers = { "price", "qty" };
std::unordered_map<std::string,int> name_to_col = { {"price",0}, {"qty",1} };
data_table::ColumnType types[] = { data_table::ColumnType::Float, data_table::ColumnType::Float };
lua_engine::RowCtx ctx;
ctx.cells = cells; ctx.orig_cols = 2; ctx.row = 0;
ctx.header_names = &headers; ctx.name_to_col = &name_to_col;
ctx.types_orig = types; ctx.n_types_orig = 2;
std::string result = lua_engine::eval(e, id, ctx, &err);
// result == "31.5"
lua_engine::release(e, id);
```
## Cuando usarla
Cuando necesites evaluar expresiones de usuario sobre filas de datos tabulares con acceso a columnas por nombre. Antes de invocar `compute_stage` con `DerivedColumn.formula` poblado. Si necesitas un pipeline TQL con formulas, `tql.cpp` usa esta funcion internamente.
## Gotchas
- **Estado interno `lua_State`**: el motor es un singleton con estado mutable. Compile/eval/release modifican el estado del `lua_State`. No es re-entrante por diseño.
- **Sandbox**: `io`, `require`, `loadfile`, `dofile`, `load`, `package`, `debug` son `nil` en el ambiente. `os` solo expone `date`, `time`, `difftime`, `clock`. Intentar `io.open(...)` produce un runtime error (nil index).
- **Thread-safety**: NO thread-safe. Una instancia (`Engine`) por hilo. Si necesitas evaluar formulas en paralelo, crea un motor independiente con `luaL_newstate` por hilo — actualmente la API no expone constructores multiples; extender si hace falta.
- **Linker**: requiere `lua54` (target CMake definido por `cpp/vendor/lua/CMakeLists.txt`). Añadir `target_link_libraries(tu_target PRIVATE lua54)` al CMakeLists de la app/test.
- **Ciclos en derived cols**: detectados con un stack `visiting_derived`; el ciclo retorna `nil` en vez de recursion infinita.
- **Auto-return**: si la formula no comienza con `return`, `if`, `for`, `while`, `do`, `local`, `repeat` o `function`, se antepone `return` automaticamente. Permite escribir `[price] * 1.21` sin `return`.
+68
View File
@@ -0,0 +1,68 @@
// tql_apply — Parse TQL Lua text and populate data_table::State.
// TQL (Table Query Language): canonical Lua chunk format for State round-trips.
// Promoted from primitives_gallery playground to registry, issue 0081-C.
//
// Depends on: Lua 5.4 C API (vendored at cpp/vendor/lua/), tql_helpers.h.
// Does NOT call lua_engine — formulas are stored verbatim in DerivedColumn.formula.
// Formula evaluation (Lua engine) is the caller's responsibility.
//
// Pure in the registry sense: no global state, no I/O, no side effects beyond
// populating the output State. The only external requirement is that Lua 5.4
// is available to parse the text chunk.
#pragma once
#include "core/data_table_types.h"
#include <string>
#include <vector>
namespace tql {
struct ApplyResult {
bool ok = false;
std::string error; // non-empty on hard failure
std::vector<std::string> warnings; // soft issues (unknown col, etc.)
data_table::State state; // populated on ok=true
};
// apply — Parse a TQL Lua chunk and return the resulting State.
//
// Parameters:
// lua_text — the TQL Lua chunk (output of tql_emit, or hand-written).
// available_headers — original column names used to validate filter/sort col refs.
// Pass empty to skip col-name validation (warnings only).
//
// Returns ApplyResult:
// ok=true — state populated; warnings may still be non-empty (unknown cols, etc.)
// ok=false — error is set; state is partially populated / undefined.
//
// DerivedColumn.formula is stored verbatim; lua_id is set to -1 (not compiled).
// The caller must compile formulas via a Lua engine after receiving the result.
ApplyResult apply(const std::string& lua_text,
const std::vector<std::string>& available_headers);
// apply (extended overload) — Playground-compatible signature used by data_table render.
//
// Wraps the base apply(), writes the parsed State into `st` in-place, and returns bool.
// Parameters cells/rows/orig_cols are accepted for API compatibility with the playground
// caller but are not used by the parser itself (they are provided for future validation).
//
// lua_text — TQL Lua chunk.
// st — output State (populated on ok=true).
// orig_headers — column names for validation (may be empty).
// orig_types — column types matching orig_headers (unused by parser, kept for compat).
// cells — row-major cell array (unused by parser; accepted for caller compat).
// rows — row count (unused by parser; accepted for caller compat).
// orig_cols — number of original columns (unused by parser; accepted for compat).
// err — if non-null, receives the error message on failure.
//
// Returns true on success, false on parse error.
bool apply(const std::string& lua_text,
data_table::State& st,
const std::vector<std::string>& orig_headers,
const std::vector<data_table::ColumnType>& orig_types,
const char* const* cells,
int rows,
int orig_cols,
std::string* err);
} // namespace tql
+94
View File
@@ -0,0 +1,94 @@
---
name: tql_apply
kind: function
lang: cpp
domain: core
version: "1.1.0"
purity: pure
signature: "tql::ApplyResult tql::apply(const std::string& lua_text, const std::vector<std::string>& available_headers)"
description: "Parses a TQL v1 Lua chunk and returns a populated data_table::State. Inverse of tql_emit. Uses Lua 5.4 C API directly (not lua_engine) — formulas are stored verbatim in DerivedColumn.formula with lua_id=-1; the caller compiles them via a Lua engine."
tags: [tables, tql, parser, cpp-tables]
uses_functions: []
uses_types:
- data_table_types_cpp_core
returns: []
returns_optional: false
error_type: ""
imports:
- "core/tql_apply.h"
- "core/tql_helpers.h"
tested: true
tests:
- "parse minimal TQL chunk"
- "parse display=bar"
- "filter parsing with known header"
- "unknown column generates warning"
- "sort parsing"
- "aggregation stage parsing"
- "expression stored verbatim lua_id=-1"
- "views parsing extra viz panel"
- "join parsing"
- "version mismatch returns error"
- "invalid Lua syntax returns error"
- "roundtrip emit->apply->emit preserves state"
- "column visibility and order preserved through roundtrip"
- "color rule roundtrip"
test_file_path: "cpp/functions/core/tql_apply_test.cpp"
file_path: "cpp/functions/core/tql_apply.cpp"
params:
- name: lua_text
desc: "TQL Lua chunk (output of tql_emit or hand-written). Must return a table with version=1."
- name: available_headers
desc: "Original column names for filter/breakout/sort col validation. Pass empty vector to skip validation (warnings will not be generated for unknown cols)."
output: "ApplyResult with: ok=true and populated state on success; ok=false and error string on hard failure (bad Lua syntax, unsupported version). Soft issues (unknown cols, unrecognised tokens) go into warnings without setting ok=false."
---
## Ejemplo
```cpp
#include "core/tql_apply.h"
const char* tql_text = R"(
return {
version = 1,
display = "bar",
stages = {
{ filter = { {"=", "status", "active"} } },
{ breakout = {"dept"}, aggregation = { {"sum", "salary"} } },
},
columns = {},
views = { {display = "bar", x_col = "dept", y_cols = {"sum_salary"}} },
visualization_settings = {},
}
)";
std::vector<std::string> headers = {"status", "dept", "salary"};
tql::ApplyResult res = tql::apply(tql_text, headers);
if (!res.ok) {
printf("TQL parse error: %s\n", res.error.c_str());
} else {
for (auto& w : res.warnings) printf("warning: %s\n", w.c_str());
// res.state is ready — compile formulas with lua_engine if needed
// for (auto& d : res.state.stages[0].derived) {
// if (!d.formula.empty()) d.lua_id = lua_engine::compile(...);
// }
}
```
## Cuando usarla
Cuando necesites restaurar un State desde texto TQL (base de datos, portapapeles, preset guardado). Siempre en pareja con `tql_emit`. Si `available_headers` está vacío no habrá validación de nombres de columna — útil cuando los headers no se conocen en el momento del parse.
## Gotchas
- Requiere Lua 5.4 vendored (`cpp/vendor/lua/`). El consumidor (CMakeLists.txt) debe linkear `lua_static`.
- Abre y cierra su propio `lua_State` por llamada (no comparte estado global). Thread-safe siempre que cada hilo use su propia instancia.
- `DerivedColumn.lua_id` siempre sale como `-1`. El caller debe compilar fórmulas con `lua_engine::compile()` tras recibir el resultado.
- Columnas no encontradas en `available_headers` van a `warnings` (ok sigue siendo true). Sólo errores de sintaxis Lua o version != 1 ponen ok=false.
- Promoted from `cpp/apps/primitives_gallery/playground/tables/tql.cpp`, issue 0081-C.
- v1.1.0 añade sobrecarga extendida `bool apply(lua_text, State&, headers, types, cells, rows, orig_cols, err*)` — firma compatible con el playground para uso desde `data_table::render`. `cells/rows/orig_cols` se aceptan para compatibilidad de API pero no son usados por el parser.
## Capability growth log
v1.1.0 (2026-05-15) — añade sobrecarga extendida bool-returning (playground-compat). Resuelve deuda técnica de data_table_cpp_viz. Issue 0081-I.
+32
View File
@@ -0,0 +1,32 @@
// tql_emit — Pure serialization of data_table::State to TQL Lua text.
// TQL (Table Query Language): canonical Lua chunk format for State round-trips.
// Promoted from primitives_gallery playground to registry, issue 0081-C.
//
// No ImGui, no I/O, no Lua runtime. Pure string generation.
// Depends only on data_table_types.h + tql_helpers.h.
#pragma once
#include "core/data_table_types.h"
#include <string>
#include <vector>
namespace tql {
// emit — Serialize a State to a complete Lua chunk (TQL v1).
//
// Parameters:
// state — the current view/pipeline state to serialize.
// headers — original column names (size = orig_cols). Derived cols are
// taken automatically from state.stages[0].derived.
// types — column types for the original columns. Missing entries default
// to ColumnType::Auto.
//
// Returns a UTF-8 Lua chunk that starts with a comment header and ends with
// "return { version=1, display=..., stages=..., columns=..., views=... }".
// The output is self-contained and parseable with tql_apply without any extra
// context (modulo formula evaluation, which requires a Lua engine).
std::string emit(const data_table::State& state,
const std::vector<std::string>& headers,
const std::vector<data_table::ColumnType>& types);
} // namespace tql
+73
View File
@@ -0,0 +1,73 @@
---
name: tql_emit
kind: function
lang: cpp
domain: core
version: "1.0.0"
purity: pure
signature: "std::string tql::emit(const data_table::State& state, const std::vector<std::string>& headers, const std::vector<data_table::ColumnType>& types)"
description: "Serializes a data_table::State to a complete TQL v1 Lua chunk. TQL (Table Query Language) is the canonical textual format for State round-trips: emit produces text, tql_apply parses it back."
tags: [tables, tql, serialization, cpp-tables]
uses_functions: []
uses_types:
- data_table_types_cpp_core
returns: []
returns_optional: false
error_type: ""
imports:
- "core/tql_emit.h"
- "core/tql_helpers.h"
tested: true
tests:
- "emit empty state produces valid header"
- "emit single column"
- "emit state with filter in stage 0"
- "emit state with sort"
- "emit stage 1 with breakout + aggregation"
- "emit with color rule"
- "emit with viz panel"
- "emit join"
- "lua_string_literal escaping"
- "color roundtrip"
test_file_path: "cpp/functions/core/tql_emit_test.cpp"
file_path: "cpp/functions/core/tql_emit.cpp"
params:
- name: state
desc: "Current view/pipeline State to serialize (stages, col_visible, col_order, color_rules, joins, extra_panels)."
- name: headers
desc: "Original column names (orig_cols count). Derived columns are taken from state.stages[0].derived automatically."
- name: types
desc: "Column types for original columns. Missing entries default to ColumnType::Auto."
output: "UTF-8 Lua chunk starting with a comment header and ending with return { version=1, display, main_source?, joins?, stages, columns, views, visualization_settings }. Self-contained and parseable with tql_apply."
---
## Ejemplo
```cpp
#include "core/tql_emit.h"
data_table::State st;
st.stages.push_back(data_table::Stage{});
st.stages[0].filters.push_back({0, data_table::Op::Eq, "Alice"});
st.display = data_table::ViewMode::Table;
std::vector<std::string> headers = {"name", "age"};
std::vector<data_table::ColumnType> types = {
data_table::ColumnType::String,
data_table::ColumnType::Int
};
std::string lua_chunk = tql::emit(st, headers, types);
// lua_chunk starts with "-- TQL v1 ..." and is parseable by tql_apply
```
## Cuando usarla
Cuando necesites persistir o serializar el estado de una tabla TQL (filtros, sorts, breakouts, viz config) a texto. Úsala antes de guardar en BD, copiar al portapapeles, o enviar al servidor. Para recuperar el State usa `tql_apply`.
## Notas
- Función pura: no usa I/O, no Lua runtime, no ImGui.
- Los DerivedColumn con formula no vacía se emiten en la sección `expressions`. Los de `source_col >= 0` (retipados puros) se emiten como columnas en la sección `columns` pero sin entrada en `expressions`.
- El formato TQL es texto Lua que usa `return { ... }` como raíz — parseable con `luaL_loadbufferx` + `lua_pcall`.
- Promoted from `cpp/apps/primitives_gallery/playground/tables/tql.cpp`, issue 0081-C.
+294
View File
@@ -0,0 +1,294 @@
// tql_helpers.cpp — Pure token helpers for TQL. No external deps.
// See tql_helpers.h for documentation.
#include "core/tql_helpers.h"
#include <algorithm>
#include <cstdio>
#include <cstring>
namespace data_table {
// ----------------------------------------------------------------------------
// Op
// ----------------------------------------------------------------------------
const char* op_label(Op o) {
switch (o) {
case Op::Eq: return "=";
case Op::Neq: return "!=";
case Op::Gt: return ">";
case Op::Gte: return ">=";
case Op::Lt: return "<";
case Op::Lte: return "<=";
case Op::Contains: return "contains";
case Op::NotContains: return "!contains";
case Op::StartsWith: return "starts";
case Op::EndsWith: return "ends";
}
return "=";
}
Op op_from_label(const char* s) {
if (!s) return Op::Eq;
if (std::strcmp(s, "=") == 0) return Op::Eq;
if (std::strcmp(s, "!=") == 0) return Op::Neq;
if (std::strcmp(s, ">") == 0) return Op::Gt;
if (std::strcmp(s, ">=") == 0) return Op::Gte;
if (std::strcmp(s, "<") == 0) return Op::Lt;
if (std::strcmp(s, "<=") == 0) return Op::Lte;
if (std::strcmp(s, "contains") == 0) return Op::Contains;
if (std::strcmp(s, "!contains") == 0) return Op::NotContains;
if (std::strcmp(s, "starts") == 0) return Op::StartsWith;
if (std::strcmp(s, "ends") == 0) return Op::EndsWith;
return Op::Eq;
}
// ----------------------------------------------------------------------------
// ColumnType
// ----------------------------------------------------------------------------
const char* column_type_name(ColumnType t) {
switch (t) {
case ColumnType::Auto: return "auto";
case ColumnType::String: return "string";
case ColumnType::Int: return "int";
case ColumnType::Float: return "float";
case ColumnType::Bool: return "bool";
case ColumnType::Date: return "date";
case ColumnType::Json: return "json";
}
return "auto";
}
ColumnType column_type_from_string(const std::string& s) {
if (s == "string") return ColumnType::String;
if (s == "int") return ColumnType::Int;
if (s == "float") return ColumnType::Float;
if (s == "bool") return ColumnType::Bool;
if (s == "date") return ColumnType::Date;
if (s == "json") return ColumnType::Json;
return ColumnType::Auto;
}
// ----------------------------------------------------------------------------
// AggFn
// ----------------------------------------------------------------------------
const char* agg_fn_token(AggFn f) {
switch (f) {
case AggFn::Count: return "count";
case AggFn::Sum: return "sum";
case AggFn::Avg: return "avg";
case AggFn::Min: return "min";
case AggFn::Max: return "max";
case AggFn::Distinct: return "distinct";
case AggFn::Stddev: return "stddev";
case AggFn::Median: return "median";
case AggFn::P25: return "p25";
case AggFn::P75: return "p75";
case AggFn::P90: return "p90";
case AggFn::P99: return "p99";
case AggFn::Percentile: return "percentile";
}
return "count";
}
AggFn agg_fn_from_string(const std::string& s) {
if (s == "count") return AggFn::Count;
if (s == "sum") return AggFn::Sum;
if (s == "avg") return AggFn::Avg;
if (s == "min") return AggFn::Min;
if (s == "max") return AggFn::Max;
if (s == "distinct") return AggFn::Distinct;
if (s == "stddev") return AggFn::Stddev;
if (s == "median") return AggFn::Median;
if (s == "p25") return AggFn::P25;
if (s == "p75") return AggFn::P75;
if (s == "p90") return AggFn::P90;
if (s == "p99") return AggFn::P99;
if (s == "percentile") return AggFn::Percentile;
return AggFn::Count;
}
// ----------------------------------------------------------------------------
// ViewMode
// ----------------------------------------------------------------------------
struct ViewModeEntry { ViewMode m; const char* token; };
static const ViewModeEntry kViewModes[] = {
{ ViewMode::Table, "table" },
{ ViewMode::Bar, "bar" },
{ ViewMode::Column, "column" },
{ ViewMode::GroupedBar, "grouped_bar" },
{ ViewMode::StackedBar, "stacked_bar" },
{ ViewMode::Line, "line" },
{ ViewMode::Area, "area" },
{ ViewMode::Stairs, "stairs" },
{ ViewMode::Scatter, "scatter" },
{ ViewMode::Bubble, "bubble" },
{ ViewMode::Histogram, "histogram" },
{ ViewMode::Histogram2D, "hist2d" },
{ ViewMode::Heatmap, "heatmap" },
{ ViewMode::BoxPlot, "boxplot" },
{ ViewMode::Stem, "stem" },
{ ViewMode::ErrorBars, "errorbars" },
{ ViewMode::Pie, "pie" },
{ ViewMode::Donut, "donut" },
{ ViewMode::Funnel, "funnel" },
{ ViewMode::Waterfall, "waterfall" },
{ ViewMode::KPI, "kpi" },
{ ViewMode::KPIGrid, "kpi_grid" },
{ ViewMode::Candlestick, "candlestick" },
{ ViewMode::Radar, "radar" },
};
static constexpr int kViewModesN = (int)(sizeof(kViewModes) / sizeof(kViewModes[0]));
const char* view_mode_token(ViewMode m) {
for (int i = 0; i < kViewModesN; ++i)
if (kViewModes[i].m == m) return kViewModes[i].token;
return "table";
}
ViewMode view_mode_from_token(const char* s) {
if (!s) return ViewMode::Table;
for (int i = 0; i < kViewModesN; ++i)
if (std::strcmp(kViewModes[i].token, s) == 0) return kViewModes[i].m;
return ViewMode::Table;
}
// ----------------------------------------------------------------------------
// JoinStrategy
// ----------------------------------------------------------------------------
const char* join_strategy_token(JoinStrategy s) {
switch (s) {
case JoinStrategy::Left: return "left";
case JoinStrategy::Inner: return "inner";
case JoinStrategy::Right: return "right";
case JoinStrategy::Full: return "full";
}
return "left";
}
JoinStrategy join_strategy_from_token(const char* s) {
if (!s) return JoinStrategy::Left;
if (std::strcmp(s, "inner") == 0) return JoinStrategy::Inner;
if (std::strcmp(s, "right") == 0) return JoinStrategy::Right;
if (std::strcmp(s, "full") == 0) return JoinStrategy::Full;
return JoinStrategy::Left;
}
// ----------------------------------------------------------------------------
// aggregation_alias
// ----------------------------------------------------------------------------
std::string aggregation_alias(const Aggregation& a) {
if (!a.alias.empty()) return a.alias;
if (a.fn == AggFn::Count) return "count";
if (a.fn == AggFn::Percentile) {
int pct = (int)(a.arg * 100.0 + 0.5);
char buf[128];
std::snprintf(buf, sizeof(buf), "p%d_%s", pct, a.col.c_str());
return buf;
}
std::string out = agg_fn_token(a.fn);
out += '_';
out += a.col;
return out;
}
// ----------------------------------------------------------------------------
// DateGranularity
// ----------------------------------------------------------------------------
const char* date_granularity_token(DateGranularity g) {
switch (g) {
case DateGranularity::None: return "";
case DateGranularity::Year: return "year";
case DateGranularity::Month: return "month";
case DateGranularity::Week: return "week";
case DateGranularity::Day: return "day";
case DateGranularity::Hour: return "hour";
}
return "";
}
DateGranularity date_granularity_from_token(const char* s) {
if (!s || !*s) return DateGranularity::None;
if (std::strcmp(s, "year") == 0) return DateGranularity::Year;
if (std::strcmp(s, "month") == 0) return DateGranularity::Month;
if (std::strcmp(s, "week") == 0) return DateGranularity::Week;
if (std::strcmp(s, "day") == 0) return DateGranularity::Day;
if (std::strcmp(s, "hour") == 0) return DateGranularity::Hour;
return DateGranularity::None;
}
DateGranularity parse_breakout_granularity(const std::string& breakout,
std::string& col_out) {
auto pos = breakout.rfind(':');
if (pos == std::string::npos) {
col_out = breakout;
return DateGranularity::None;
}
std::string suffix = breakout.substr(pos + 1);
DateGranularity g = date_granularity_from_token(suffix.c_str());
if (g == DateGranularity::None) {
col_out = breakout;
return DateGranularity::None;
}
col_out = breakout.substr(0, pos);
return g;
}
// ----------------------------------------------------------------------------
// Lua string literal escaping
// ----------------------------------------------------------------------------
std::string lua_string_literal(const std::string& s) {
std::string out;
out.reserve(s.size() + 4);
out += '"';
for (char c : s) {
switch (c) {
case '\\': out += "\\\\"; break;
case '"': out += "\\\""; break;
case '\n': out += "\\n"; break;
case '\r': out += "\\r"; break;
case '\t': out += "\\t"; break;
default:
if ((unsigned char)c < 0x20) {
char b[8];
std::snprintf(b, sizeof(b), "\\%d", (unsigned char)c);
out += b;
} else {
out += c;
}
}
}
out += '"';
return out;
}
// ----------------------------------------------------------------------------
// Color <-> hex
// ----------------------------------------------------------------------------
std::string color_to_hex(unsigned int c) {
unsigned int r = c & 0xFF;
unsigned int g = (c >> 8) & 0xFF;
unsigned int b = (c >> 16) & 0xFF;
unsigned int a = (c >> 24) & 0xFF;
char buf[16];
if (a == 0xFF)
std::snprintf(buf, sizeof(buf), "#%02x%02x%02x", r, g, b);
else
std::snprintf(buf, sizeof(buf), "#%02x%02x%02x%02x", r, g, b, a);
return buf;
}
unsigned int hex_to_color(const std::string& s) {
if (s.size() < 7 || s[0] != '#') return 0xFFFFFFFF;
auto hex2 = [&](size_t i) -> unsigned int {
unsigned int v = 0;
if (i + 1 < s.size()) std::sscanf(s.c_str() + i, "%2x", &v);
return v;
};
unsigned int r = hex2(1), g = hex2(3), b = hex2(5);
unsigned int a = (s.size() >= 9) ? hex2(7) : 0xFF;
return r | (g << 8) | (b << 16) | (a << 24);
}
} // namespace data_table
+51
View File
@@ -0,0 +1,51 @@
// tql_helpers — Pure token helpers for TQL (Table Query Language).
// Promoted from primitives_gallery playground to registry, issue 0081-C.
// No ImGui, no I/O, no Lua. Pure string<->enum conversions used by tql_emit
// and tql_apply.
#pragma once
#include "core/data_table_types.h"
#include <string>
namespace data_table {
// Op <-> token (TQL wire format).
const char* op_label(Op o);
Op op_from_label(const char* s);
// ColumnType <-> string.
const char* column_type_name(ColumnType t);
ColumnType column_type_from_string(const std::string& s);
// AggFn <-> token.
const char* agg_fn_token(AggFn f);
AggFn agg_fn_from_string(const std::string& s);
// ViewMode <-> token (Lua string used in TQL "display" field).
const char* view_mode_token(ViewMode m);
ViewMode view_mode_from_token(const char* s);
// JoinStrategy <-> token.
const char* join_strategy_token(JoinStrategy s);
JoinStrategy join_strategy_from_token(const char* s);
// aggregation_alias — pure: default alias when agg.alias is empty.
std::string aggregation_alias(const Aggregation& a);
// parse_breakout_granularity — splits "col:granularity" into col + gran.
// Returns DateGranularity::None if no suffix or unrecognised suffix.
DateGranularity parse_breakout_granularity(const std::string& breakout,
std::string& col_out);
// Lua string literal escaping used in TQL emit.
std::string lua_string_literal(const std::string& s);
// Color <-> hex string ("#rrggbb" / "#rrggbbaa").
std::string color_to_hex(unsigned int c);
unsigned int hex_to_color(const std::string& s);
// DateGranularity <-> token.
const char* date_granularity_token(DateGranularity g);
DateGranularity date_granularity_from_token(const char* s);
} // namespace data_table
+64
View File
@@ -0,0 +1,64 @@
---
name: tql_helpers
kind: function
lang: cpp
domain: core
version: "1.0.0"
purity: pure
signature: "namespace data_table { const char* op_label(Op); const char* view_mode_token(ViewMode); const char* agg_fn_token(AggFn); const char* join_strategy_token(JoinStrategy); std::string aggregation_alias(const Aggregation&); std::string lua_string_literal(const std::string&); std::string color_to_hex(unsigned int); unsigned int hex_to_color(const std::string&); ... }"
description: "Pure token conversion helpers for TQL (Table Query Language): Op/ColumnType/AggFn/ViewMode/JoinStrategy/DateGranularity enum-to-string and back, aggregation_alias, lua_string_literal escaping, and color hex encoding. Used internally by tql_emit and tql_apply."
tags: [tables, tql, cpp-tables]
uses_functions: []
uses_types:
- data_table_types_cpp_core
returns: []
returns_optional: false
error_type: ""
imports:
- "core/tql_helpers.h"
tested: true
tests:
- "lua_string_literal escaping"
- "color roundtrip"
test_file_path: "cpp/functions/core/tql_emit_test.cpp"
file_path: "cpp/functions/core/tql_helpers.cpp"
params:
- name: various
desc: "Token conversion functions operating on data_table enum values and std::string. All are pure — no side effects, no global state."
output: "String tokens or enum values corresponding to TQL wire format. Used by tql_emit (serialization) and tql_apply (parsing)."
---
## Ejemplo
```cpp
#include "core/tql_helpers.h"
// Op token
const char* t = data_table::op_label(data_table::Op::Gte); // ">="
// ViewMode token
const char* vm = data_table::view_mode_token(data_table::ViewMode::Bar); // "bar"
data_table::ViewMode m = data_table::view_mode_from_token("scatter"); // ViewMode::Scatter
// Aggregation alias
data_table::Aggregation a;
a.fn = data_table::AggFn::Sum; a.col = "revenue";
std::string alias = data_table::aggregation_alias(a); // "sum_revenue"
// Lua string escaping (used in emit)
std::string lit = data_table::lua_string_literal("say \"hi\""); // "\"say \\\"hi\\\"\""
// Color hex roundtrip
std::string hex = data_table::color_to_hex(0xFF0000FFu); // "#ff0000"
unsigned int c = data_table::hex_to_color("#ff0000"); // 0xFF0000FF
```
## Cuando usarla
Cuando necesites convertir entre valores internos del stack TQL (Op, ViewMode, AggFn, etc.) y sus representaciones textuales en el formato Lua TQL. Directamente usado por `tql_emit` y `tql_apply`; rara vez se llama desde código externo.
## Notas
- Todas las funciones son puras (no I/O, no estado global).
- `view_mode_from_token(nullptr)` y `op_from_label(nullptr)` retornan valores por defecto seguros (Table / Eq).
- Promoted from `cpp/apps/primitives_gallery/playground/tables/data_table_logic.cpp`, issue 0081-C.
+828
View File
@@ -0,0 +1,828 @@
// tql_to_sql.cpp — pure walker TQL -> SQL DuckDB + Lua subset transpiler.
// Promoted from primitives_gallery playground to registry, issue 0081.
// No DuckDB linked, no ImGui, no I/O.
#include "core/tql_to_sql.h"
#include "core/tql_helpers.h"
#include <cctype>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <set>
#include <sstream>
#include <unordered_map>
namespace tql_to_sql {
using namespace data_table;
// ============================================================================
// Lua subset tokenizer + recursive-descent expression parser -> SQL string.
// ============================================================================
namespace {
struct Tok {
enum Kind {
EndT, NumT, StrT, IdentT, ColT,
// operators / keywords
Plus, Minus, Star, Slash, Percent, ConcatT,
Eq, Neq, Lt, Lte, Gt, Gte,
AndT, OrT, NotT,
IfT, ThenT, ElseT, EndKW,
LParen, RParen, Comma, Dot,
TrueT, FalseT, NilT,
} kind = EndT;
std::string text; // raw token texto (para idents/numbers/strings)
};
// Categorias prohibidas: token literal -> mensaje.
const std::unordered_map<std::string, const char*>& forbidden_keywords() {
static const std::unordered_map<std::string, const char*> M = {
{"function", "closures not allowed in SQL transpile subset"},
{"local", "local declarations not allowed"},
{"for", "loops not allowed"},
{"while", "loops not allowed"},
{"repeat", "loops not allowed"},
{"do", "block statements not allowed"},
{"return", "explicit return not allowed (formula is implicit expression)"},
{"goto", "goto not allowed"},
{"break", "break not allowed (no loops)"},
// io/os/debug/coroutines
{"io", "io.* access not allowed"},
{"os", "os.* access not allowed"},
{"debug", "debug.* access not allowed"},
{"package", "package access not allowed"},
{"require", "require not allowed"},
{"coroutine","coroutines not allowed"},
{"setmetatable","metatables not allowed"},
{"getmetatable","metatables not allowed"},
{"rawget", "rawget not allowed"},
{"rawset", "rawset not allowed"},
{"pcall", "pcall not allowed"},
{"xpcall", "xpcall not allowed"},
{"print", "print not allowed (SQL has no side effects)"},
};
return M;
}
// Whitelist de funciones SQL-transpilables: lua name -> SQL function template.
// Template usa $1, $2, ... como placeholders de argumentos.
struct FnMap { int min_args; int max_args; const char* sql_tmpl; };
const std::unordered_map<std::string, FnMap>& fn_whitelist() {
static const std::unordered_map<std::string, FnMap> M = {
// math.*
{"math.floor", {1, 1, "floor($1)"}},
{"math.ceil", {1, 1, "ceiling($1)"}},
{"math.abs", {1, 1, "abs($1)"}},
{"math.sqrt", {1, 1, "sqrt($1)"}},
{"math.sin", {1, 1, "sin($1)"}},
{"math.cos", {1, 1, "cos($1)"}},
{"math.log", {1, 1, "ln($1)"}},
{"math.exp", {1, 1, "exp($1)"}},
{"math.min", {2, 2, "least($1, $2)"}},
{"math.max", {2, 2, "greatest($1, $2)"}},
// string.*
{"string.upper", {1, 1, "upper($1)"}},
{"string.lower", {1, 1, "lower($1)"}},
{"string.len", {1, 1, "length($1)"}},
{"string.sub", {2, 3, "/*SUBSTRING*/"}}, // manejo especial: argc 2 vs 3
// top-level
{"tostring", {1, 1, "CAST($1 AS VARCHAR)"}},
{"tonumber", {1, 1, "CAST($1 AS DOUBLE)"}},
};
return M;
}
// Identifier SQL-safe: siempre quote con dobles comillas para preservar case
// y permitir `:` (sufijo granularity).
std::string sql_ident(const std::string& name) {
std::string out;
out.reserve(name.size() + 4);
out += '"';
for (char c : name) {
if (c == '"') out += "\"\""; // escape
else out += c;
}
out += '"';
return out;
}
std::string sql_string_literal(const std::string& s) {
std::string out;
out.reserve(s.size() + 4);
out += '\'';
for (char c : s) {
if (c == '\'') out += "''";
else out += c;
}
out += '\'';
return out;
}
class Lexer {
public:
Lexer(const std::string& src) : src_(src) {}
bool tokenize(std::vector<Tok>& out) {
size_t i = 0;
while (i < src_.size()) {
char c = src_[i];
if (std::isspace((unsigned char)c)) { ++i; continue; }
// Lua line comment
if (c == '-' && i + 1 < src_.size() && src_[i+1] == '-') {
while (i < src_.size() && src_[i] != '\n') ++i;
continue;
}
if (c == '[') {
// col ref [identifier]
size_t j = i + 1;
std::string name;
while (j < src_.size() && src_[j] != ']') {
name += src_[j];
++j;
}
if (j >= src_.size()) { error_ = "unterminated [col] ref"; return false; }
Tok t; t.kind = Tok::ColT; t.text = name;
out.push_back(t);
i = j + 1;
continue;
}
if (c == '"' || c == '\'') {
char q = c;
++i;
std::string s;
while (i < src_.size() && src_[i] != q) {
if (src_[i] == '\\' && i + 1 < src_.size()) {
char esc = src_[i+1];
if (esc == 'n') s += '\n';
else if (esc == 't') s += '\t';
else if (esc == '\\') s += '\\';
else if (esc == '\'') s += '\'';
else if (esc == '"') s += '"';
else s += esc;
i += 2;
} else {
s += src_[i++];
}
}
if (i >= src_.size()) { error_ = "unterminated string literal"; return false; }
++i;
Tok t; t.kind = Tok::StrT; t.text = s;
out.push_back(t);
continue;
}
if (std::isdigit((unsigned char)c) || (c == '.' && i + 1 < src_.size() && std::isdigit((unsigned char)src_[i+1]))) {
std::string n;
bool seen_dot = false;
while (i < src_.size()) {
char d = src_[i];
if (std::isdigit((unsigned char)d)) { n += d; ++i; }
else if (d == '.' && !seen_dot) { n += d; seen_dot = true; ++i; }
else break;
}
Tok t; t.kind = Tok::NumT; t.text = n;
out.push_back(t);
continue;
}
if (std::isalpha((unsigned char)c) || c == '_') {
std::string id;
while (i < src_.size() &&
(std::isalnum((unsigned char)src_[i]) || src_[i] == '_')) {
id += src_[i++];
}
auto& F = forbidden_keywords();
auto fit = F.find(id);
if (fit != F.end()) {
error_ = std::string("token '") + id + "': " + fit->second;
return false;
}
Tok t;
if (id == "and") t.kind = Tok::AndT;
else if (id == "or") t.kind = Tok::OrT;
else if (id == "not") t.kind = Tok::NotT;
else if (id == "if") t.kind = Tok::IfT;
else if (id == "then") t.kind = Tok::ThenT;
else if (id == "else") t.kind = Tok::ElseT;
else if (id == "end") t.kind = Tok::EndKW;
else if (id == "true") t.kind = Tok::TrueT;
else if (id == "false") t.kind = Tok::FalseT;
else if (id == "nil") t.kind = Tok::NilT;
else { t.kind = Tok::IdentT; t.text = id; }
out.push_back(t);
continue;
}
// Operators
auto emit = [&](Tok::Kind k, int len) {
Tok t; t.kind = k; out.push_back(t); i += (size_t)len;
};
if (c == '+') { emit(Tok::Plus, 1); continue; }
if (c == '-') { emit(Tok::Minus, 1); continue; }
if (c == '*') { emit(Tok::Star, 1); continue; }
if (c == '/') { emit(Tok::Slash, 1); continue; }
if (c == '%') { emit(Tok::Percent,1); continue; }
if (c == '(') { emit(Tok::LParen, 1); continue; }
if (c == ')') { emit(Tok::RParen, 1); continue; }
if (c == ',') { emit(Tok::Comma, 1); continue; }
if (c == '.') {
if (i + 1 < src_.size() && src_[i+1] == '.') {
if (i + 2 < src_.size() && src_[i+2] == '.') {
error_ = "'...' vararg not allowed"; return false;
}
emit(Tok::ConcatT, 2); continue;
}
emit(Tok::Dot, 1); continue;
}
if (c == '=') {
if (i + 1 < src_.size() && src_[i+1] == '=') { emit(Tok::Eq, 2); continue; }
error_ = "single '=' (assignment) not allowed"; return false;
}
if (c == '~') {
if (i + 1 < src_.size() && src_[i+1] == '=') { emit(Tok::Neq, 2); continue; }
error_ = "stray '~'"; return false;
}
if (c == '<') {
if (i + 1 < src_.size() && src_[i+1] == '=') { emit(Tok::Lte, 2); continue; }
emit(Tok::Lt, 1); continue;
}
if (c == '>') {
if (i + 1 < src_.size() && src_[i+1] == '=') { emit(Tok::Gte, 2); continue; }
emit(Tok::Gt, 1); continue;
}
if (c == '{') { error_ = "table literals '{...}' not allowed"; return false; }
if (c == '}') { error_ = "stray '}'"; return false; }
if (c == ';') { error_ = "multi-statement not allowed"; return false; }
if (c == '#') { error_ = "length '#' operator not allowed"; return false; }
if (c == ':') { error_ = "method calls ':' not allowed"; return false; }
error_ = std::string("unexpected character '") + c + "'";
return false;
}
Tok t; t.kind = Tok::EndT;
out.push_back(t);
return true;
}
const std::string& error() const { return error_; }
private:
const std::string& src_;
std::string error_;
};
class Parser {
public:
Parser(const std::vector<Tok>& toks,
const std::vector<std::string>& headers)
: toks_(toks), headers_(headers) {}
bool parse_expr(std::string& out) {
return parse_ternary(out);
}
bool parse_ternary(std::string& out) {
if (peek(0).kind == Tok::IfT) {
++pos_;
std::string a, b, c;
if (!parse_logic_or(a)) return false;
if (!eat(Tok::ThenT, "'then' expected after 'if'")) return false;
if (!parse_ternary(b)) return false;
if (!eat(Tok::ElseT, "'else' expected (subset requires else branch)")) return false;
if (!parse_ternary(c)) return false;
if (!eat(Tok::EndKW, "'end' expected to close 'if'")) return false;
out = "CASE WHEN " + a + " THEN " + b + " ELSE " + c + " END";
return true;
}
return parse_logic_or(out);
}
bool parse_logic_or(std::string& out) {
if (!parse_logic_and(out)) return false;
while (peek(0).kind == Tok::OrT) {
++pos_;
std::string rhs;
if (!parse_logic_and(rhs)) return false;
out = "(" + out + " OR " + rhs + ")";
}
return true;
}
bool parse_logic_and(std::string& out) {
if (!parse_not(out)) return false;
while (peek(0).kind == Tok::AndT) {
++pos_;
std::string rhs;
if (!parse_not(rhs)) return false;
out = "(" + out + " AND " + rhs + ")";
}
return true;
}
bool parse_not(std::string& out) {
if (peek(0).kind == Tok::NotT) {
++pos_;
std::string e;
if (!parse_not(e)) return false;
out = "NOT (" + e + ")";
return true;
}
return parse_comparison(out);
}
bool parse_comparison(std::string& out) {
if (!parse_concat(out)) return false;
while (true) {
Tok::Kind k = peek(0).kind;
const char* op = nullptr;
if (k == Tok::Eq) op = " = ";
else if (k == Tok::Neq) op = " <> ";
else if (k == Tok::Lt) op = " < ";
else if (k == Tok::Lte) op = " <= ";
else if (k == Tok::Gt) op = " > ";
else if (k == Tok::Gte) op = " >= ";
else break;
++pos_;
std::string rhs;
if (!parse_concat(rhs)) return false;
out = "(" + out + op + rhs + ")";
}
return true;
}
bool parse_concat(std::string& out) {
if (!parse_additive(out)) return false;
while (peek(0).kind == Tok::ConcatT) {
++pos_;
std::string rhs;
if (!parse_additive(rhs)) return false;
out = "(" + out + " || " + rhs + ")";
}
return true;
}
bool parse_additive(std::string& out) {
if (!parse_multiplicative(out)) return false;
while (peek(0).kind == Tok::Plus || peek(0).kind == Tok::Minus) {
const char* op = (peek(0).kind == Tok::Plus) ? " + " : " - ";
++pos_;
std::string rhs;
if (!parse_multiplicative(rhs)) return false;
out = "(" + out + op + rhs + ")";
}
return true;
}
bool parse_multiplicative(std::string& out) {
if (!parse_unary(out)) return false;
while (peek(0).kind == Tok::Star || peek(0).kind == Tok::Slash || peek(0).kind == Tok::Percent) {
const char* op = (peek(0).kind == Tok::Star) ? " * "
: (peek(0).kind == Tok::Slash) ? " / " : " % ";
++pos_;
std::string rhs;
if (!parse_unary(rhs)) return false;
out = "(" + out + op + rhs + ")";
}
return true;
}
bool parse_unary(std::string& out) {
if (peek(0).kind == Tok::Minus) {
++pos_;
std::string e;
if (!parse_unary(e)) return false;
out = "(-" + e + ")";
return true;
}
return parse_primary(out);
}
bool parse_primary(std::string& out) {
Tok t = peek(0);
if (t.kind == Tok::NumT) {
++pos_;
out = t.text;
return true;
}
if (t.kind == Tok::StrT) {
++pos_;
out = sql_string_literal(t.text);
return true;
}
if (t.kind == Tok::TrueT) { ++pos_; out = "TRUE"; return true; }
if (t.kind == Tok::FalseT) { ++pos_; out = "FALSE"; return true; }
if (t.kind == Tok::NilT) { ++pos_; out = "NULL"; return true; }
if (t.kind == Tok::ColT) {
++pos_;
(void)headers_;
out = sql_ident(t.text);
return true;
}
if (t.kind == Tok::LParen) {
++pos_;
std::string e;
if (!parse_expr(e)) return false;
if (!eat(Tok::RParen, "expected ')'")) return false;
out = "(" + e + ")";
return true;
}
if (t.kind == Tok::IdentT) {
std::string name = t.text;
++pos_;
if (peek(0).kind == Tok::Dot) {
++pos_;
if (peek(0).kind != Tok::IdentT) {
error_ = "expected identifier after '.'";
return false;
}
name += "." + peek(0).text;
++pos_;
}
if (peek(0).kind != Tok::LParen) {
error_ = "bare identifier '" + name +
"' not allowed (only [col] refs + whitelisted fn calls)";
return false;
}
++pos_;
std::vector<std::string> args;
if (peek(0).kind != Tok::RParen) {
while (true) {
std::string a;
if (!parse_expr(a)) return false;
args.push_back(a);
if (peek(0).kind == Tok::Comma) { ++pos_; continue; }
break;
}
}
if (!eat(Tok::RParen, "expected ')' closing function args")) return false;
auto& W = fn_whitelist();
auto wit = W.find(name);
if (wit == W.end()) {
error_ = "function '" + name +
"' not in SQL transpile whitelist (math.*, string.upper/lower/len/sub, tostring, tonumber)";
return false;
}
const FnMap& fm = wit->second;
if ((int)args.size() < fm.min_args || (int)args.size() > fm.max_args) {
std::ostringstream os;
os << "function '" << name << "' takes " << fm.min_args;
if (fm.max_args != fm.min_args) os << ".." << fm.max_args;
os << " args, got " << args.size();
error_ = os.str();
return false;
}
if (name == "string.sub") {
if (args.size() == 2) {
out = "substring(" + args[0] + ", " + args[1] + ")";
} else {
out = "substring(" + args[0] + ", " + args[1] +
", (" + args[2] + ") - (" + args[1] + ") + 1)";
}
return true;
}
// Generic: substitute $1..$N in template.
std::string s = fm.sql_tmpl;
for (int i = 0; i < (int)args.size(); ++i) {
char ph[6];
std::snprintf(ph, sizeof(ph), "$%d", i + 1);
std::string p = ph;
size_t at = 0;
while ((at = s.find(p, at)) != std::string::npos) {
s.replace(at, p.size(), args[i]);
at += args[i].size();
}
}
out = s;
return true;
}
error_ = std::string("unexpected token in expression");
return false;
}
bool eat(Tok::Kind k, const char* msg) {
if (peek(0).kind != k) { error_ = msg; return false; }
++pos_;
return true;
}
const Tok& peek(int off) const {
size_t i = pos_ + (size_t)off;
if (i >= toks_.size()) return toks_.back();
return toks_[i];
}
bool at_end() const { return peek(0).kind == Tok::EndT; }
const std::string& error() const { return error_; }
private:
const std::vector<Tok>& toks_;
const std::vector<std::string>& headers_;
size_t pos_ = 0;
std::string error_;
};
} // anon
std::string transpile_expr(const std::string& formula,
const std::vector<std::string>& in_headers,
std::string& error_out) {
error_out.clear();
std::vector<Tok> toks;
Lexer lex(formula);
if (!lex.tokenize(toks)) {
error_out = lex.error();
return "";
}
Parser p(toks, in_headers);
std::string out;
if (!p.parse_expr(out)) {
error_out = p.error();
return "";
}
if (!p.at_end()) {
error_out = "unexpected trailing tokens after expression";
return "";
}
return out;
}
bool is_transpilable(const std::string& formula, std::string& error_out) {
std::vector<std::string> empty;
std::string s = transpile_expr(formula, empty, error_out);
return error_out.empty() && !s.empty();
}
// ============================================================================
// TQL State -> SQL DuckDB emitter.
// ============================================================================
namespace {
// resolve_main_idx: static helper — finds index of main_source in tables.
// Returns -1 if tables empty, 0 if main_source empty or not found.
static int resolve_main_idx(const std::vector<TableInput>& tables,
const std::string& main_source) {
if (tables.empty()) return -1;
if (main_source.empty()) return 0;
for (size_t i = 0; i < tables.size(); ++i) {
if (tables[i].name == main_source) return (int)i;
}
return 0;
}
std::string emit_agg_expr(const Aggregation& a) {
switch (a.fn) {
case AggFn::Count: return "COUNT(*)";
case AggFn::Sum: return "SUM(" + sql_ident(a.col) + ")";
case AggFn::Avg: return "AVG(" + sql_ident(a.col) + ")";
case AggFn::Min: return "MIN(" + sql_ident(a.col) + ")";
case AggFn::Max: return "MAX(" + sql_ident(a.col) + ")";
case AggFn::Distinct: return "COUNT(DISTINCT " + sql_ident(a.col) + ")";
case AggFn::Stddev: return "STDDEV(" + sql_ident(a.col) + ")";
case AggFn::Median: return "quantile_cont(" + sql_ident(a.col) + ", 0.5)";
case AggFn::P25: return "quantile_cont(" + sql_ident(a.col) + ", 0.25)";
case AggFn::P75: return "quantile_cont(" + sql_ident(a.col) + ", 0.75)";
case AggFn::P90: return "quantile_cont(" + sql_ident(a.col) + ", 0.90)";
case AggFn::P99: return "quantile_cont(" + sql_ident(a.col) + ", 0.99)";
case AggFn::Percentile: {
char buf[32];
std::snprintf(buf, sizeof(buf), "%g", a.arg);
return std::string("quantile_cont(") + sql_ident(a.col) + ", " + buf + ")";
}
}
return "/* unknown agg */ NULL";
}
std::string emit_breakout_expr(const std::string& bk) {
std::string col_clean;
DateGranularity g = parse_breakout_granularity(bk, col_clean);
if (g == DateGranularity::None) {
return sql_ident(col_clean);
}
const char* tok = date_granularity_token(g);
return std::string("date_trunc('") + tok + "', " + sql_ident(col_clean) + ")";
}
const char* sql_op(Op op) {
switch (op) {
case Op::Eq: return " = ";
case Op::Neq: return " <> ";
case Op::Gt: return " > ";
case Op::Gte: return " >= ";
case Op::Lt: return " < ";
case Op::Lte: return " <= ";
case Op::Contains: return " LIKE ";
case Op::NotContains: return " NOT LIKE ";
case Op::StartsWith: return " LIKE ";
case Op::EndsWith: return " LIKE ";
}
return " = ";
}
std::string emit_filter_rhs(const Filter& f, std::vector<std::string>& params) {
if (f.op == Op::Contains || f.op == Op::NotContains) {
std::string v = "%" + f.value + "%";
params.push_back(v);
return "?";
}
if (f.op == Op::StartsWith) {
std::string v = f.value + "%";
params.push_back(v);
return "?";
}
if (f.op == Op::EndsWith) {
std::string v = "%" + f.value;
params.push_back(v);
return "?";
}
params.push_back(f.value);
return "?";
}
bool emit_stage0(const State& st, const std::vector<TableInput>& tables,
int main_idx, SqlEmit& e) {
if (main_idx < 0 || main_idx >= (int)tables.size()) {
e.error = "main table out of range";
return false;
}
const TableInput& main_t = tables[(size_t)main_idx];
std::string select_list;
for (size_t i = 0; i < main_t.headers.size(); ++i) {
if (i > 0) select_list += ", ";
select_list += sql_ident(main_t.headers[i]);
}
// Derived cols (stage 0 derived).
if (!st.stages.empty()) {
const Stage& s0 = st.stages[0];
for (const auto& d : s0.derived) {
if (d.source_col >= 0 && d.formula.empty()) {
if (d.source_col < (int)main_t.headers.size()) {
select_list += ", " + sql_ident(main_t.headers[(size_t)d.source_col])
+ " AS " + sql_ident(d.name);
}
continue;
}
std::string err;
std::string expr = transpile_expr(d.formula, main_t.headers, err);
if (!err.empty()) {
std::string msg = "derived col '" + d.name +
"' formula out of SQL subset: " + err;
e.warnings.push_back(msg);
continue;
}
select_list += ", " + expr + " AS " + sql_ident(d.name);
}
}
std::string from = sql_ident(main_t.name);
// Joins
for (const auto& jn : st.joins) {
const TableInput* right = nullptr;
for (const auto& ti : tables) {
if (ti.name == jn.source) { right = &ti; break; }
}
if (!right) {
e.warnings.push_back("join source '" + jn.source + "' not in tables");
continue;
}
const char* strat = "LEFT JOIN";
switch (jn.strategy) {
case JoinStrategy::Left: strat = "LEFT JOIN"; break;
case JoinStrategy::Inner: strat = "INNER JOIN"; break;
case JoinStrategy::Right: strat = "RIGHT JOIN"; break;
case JoinStrategy::Full: strat = "FULL OUTER JOIN"; break;
}
from += "\n " + std::string(strat) + " " + sql_ident(right->name)
+ " AS " + sql_ident(jn.alias) + " ON ";
for (size_t k = 0; k < jn.on.size(); ++k) {
if (k > 0) from += " AND ";
from += sql_ident(main_t.name) + "." + sql_ident(jn.on[k].first)
+ " = " + sql_ident(jn.alias) + "." + sql_ident(jn.on[k].second);
}
if (jn.fields.empty()) {
for (const auto& rh : right->headers) {
std::string aliased = jn.alias + "." + rh;
select_list += ", " + sql_ident(jn.alias) + "." + sql_ident(rh)
+ " AS " + sql_ident(aliased);
}
} else {
for (const auto& fld : jn.fields) {
std::string aliased = jn.alias + "." + fld;
select_list += ", " + sql_ident(jn.alias) + "." + sql_ident(fld)
+ " AS " + sql_ident(aliased);
}
}
}
// Stage 0 WHERE: filters.
std::vector<std::string> eff_headers = main_t.headers;
if (!st.stages.empty()) {
for (const auto& d : st.stages[0].derived) {
eff_headers.push_back(d.name);
}
}
std::string where_clause;
if (!st.stages.empty()) {
const Stage& s0 = st.stages[0];
for (size_t fi = 0; fi < s0.filters.size(); ++fi) {
const Filter& f = s0.filters[fi];
if (f.col < 0 || f.col >= (int)eff_headers.size()) {
e.warnings.push_back("stage0 filter col idx out of range");
continue;
}
std::string col = sql_ident(eff_headers[(size_t)f.col]);
if (!where_clause.empty()) where_clause += " AND ";
where_clause += col + sql_op(f.op) + emit_filter_rhs(f, e.params);
}
}
// Stage 0 ORDER BY
std::string order_clause;
if (!st.stages.empty()) {
const Stage& s0 = st.stages[0];
for (size_t si = 0; si < s0.sorts.size(); ++si) {
const SortClause& sc = s0.sorts[si];
if (!order_clause.empty()) order_clause += ", ";
order_clause += sql_ident(sc.col) + (sc.desc ? " DESC" : " ASC");
}
}
std::string cte = "t0 AS (\n SELECT " + select_list + "\n FROM " + from;
if (!where_clause.empty()) cte += "\n WHERE " + where_clause;
if (!order_clause.empty()) cte += "\n ORDER BY " + order_clause;
cte += "\n)";
e.sql = "WITH " + cte;
return true;
}
bool emit_stage_n(const Stage& stg, int n, SqlEmit& e) {
std::string prev = "t" + std::to_string(n - 1);
std::string cur = "t" + std::to_string(n);
std::string select_list;
for (size_t i = 0; i < stg.breakouts.size(); ++i) {
if (i > 0) select_list += ", ";
select_list += emit_breakout_expr(stg.breakouts[i])
+ " AS " + sql_ident(stg.breakouts[i]);
}
for (size_t i = 0; i < stg.aggregations.size(); ++i) {
if (!select_list.empty()) select_list += ", ";
std::string alias = aggregation_alias(stg.aggregations[i]);
select_list += emit_agg_expr(stg.aggregations[i]) + " AS " + sql_ident(alias);
}
if (select_list.empty()) select_list = "*";
// GROUP BY
std::string group_clause;
for (size_t i = 0; i < stg.breakouts.size(); ++i) {
if (i > 0) group_clause += ", ";
group_clause += emit_breakout_expr(stg.breakouts[i]);
}
// ORDER BY
std::string order_clause;
for (size_t i = 0; i < stg.sorts.size(); ++i) {
if (i > 0) order_clause += ", ";
order_clause += sql_ident(stg.sorts[i].col) + (stg.sorts[i].desc ? " DESC" : " ASC");
}
std::string cte = ",\n" + cur + " AS (\n SELECT " + select_list
+ "\n FROM " + prev;
if (!group_clause.empty()) cte += "\n GROUP BY " + group_clause;
if (!order_clause.empty()) cte += "\n ORDER BY " + order_clause;
cte += "\n)";
e.sql += cte;
return true;
}
} // anon
SqlEmit emit_sql(const State& state,
const std::vector<TableInput>& tables,
int up_to_stage) {
SqlEmit out;
if (state.stages.empty()) {
out.error = "state has no stages";
return out;
}
if (tables.empty()) {
out.error = "no input tables provided";
return out;
}
int target = (up_to_stage < 0) ? state.active_stage : up_to_stage;
if (target < 0) target = 0;
if (target >= (int)state.stages.size()) target = (int)state.stages.size() - 1;
int main_idx = resolve_main_idx(tables, state.main_source);
if (main_idx < 0) main_idx = 0;
if (!emit_stage0(state, tables, main_idx, out)) return out;
for (int si = 1; si <= target; ++si) {
if (!emit_stage_n(state.stages[(size_t)si], si, out)) return out;
}
out.sql += "\nSELECT * FROM t" + std::to_string(target) + ";\n";
return out;
}
} // namespace tql_to_sql
+41
View File
@@ -0,0 +1,41 @@
// tql_to_sql — Pure TQL State -> SQL DuckDB emitter + Lua subset transpiler.
// No DuckDB linked. Only string emit + validation.
// Promoted from primitives_gallery playground to registry, issue 0081.
// Depends only on core/data_table_types.h + core/tql_helpers.h.
#pragma once
#include "core/data_table_types.h"
#include <string>
#include <vector>
namespace tql_to_sql {
struct SqlEmit {
std::string sql; // SELECT/CTE chain DuckDB
std::vector<std::string> params; // bound values positionales (?)
std::vector<std::string> warnings; // soft issues (col not found, etc.)
std::string error; // si non-empty, emit fallo
};
// Pure: emite SQL DuckDB equivalente a stages 0..active del state.
// `tables` provee schema (headers/types/name) de cada TableInput. El caller
// es responsable de hidratar las tablas en DuckDB con esos nombres.
// `up_to_stage = -1` => state.active_stage.
SqlEmit emit_sql(const data_table::State& state,
const std::vector<data_table::TableInput>& tables,
int up_to_stage = -1);
// Pure: valida que `formula` (cuerpo Lua de un derived col) este dentro del
// subset SQL-transpilable. Si valido, retorna true. Si no, false + razon
// concreta en `error_out` (categoria + token problematico).
bool is_transpilable(const std::string& formula, std::string& error_out);
// Pure: transpila formula Lua subset -> SQL expression. Si fuera de subset,
// retorna "" y rellena `error_out`. Asume is_transpilable retornaria true.
// `in_headers` necesario para resolver `[col]` refs y emitir identifier
// SQL apropiado (quoted si tiene char especial).
std::string transpile_expr(const std::string& formula,
const std::vector<std::string>& in_headers,
std::string& error_out);
} // namespace tql_to_sql
+84
View File
@@ -0,0 +1,84 @@
---
name: tql_to_sql
kind: function
lang: cpp
domain: core
version: "1.0.0"
purity: pure
signature: "tql_to_sql::SqlEmit tql_to_sql::emit_sql(const data_table::State& state, const std::vector<data_table::TableInput>& tables, int up_to_stage = -1)"
description: "Transpiles a TQL State pipeline to DuckDB-compatible SQL (CTE chain). Also exposes transpile_expr and is_transpilable for validating Lua formula subsets. Pure: no DuckDB linked, no I/O."
tags: [tables, tql, sql, duckdb, cpp-tables]
uses_functions:
- tql_helpers_cpp_core
uses_types:
- data_table_types_cpp_core
returns: []
returns_optional: false
error_type: ""
imports:
- "core/tql_to_sql.h"
- "core/tql_helpers.h"
tested: true
tests:
- "transpile_expr: simple arithmetic"
- "transpile_expr: numeric literal"
- "transpile_expr: string literal"
- "transpile_expr: boolean literals"
- "transpile_expr: ternary if/then/else"
- "transpile_expr: string concat"
- "transpile_expr: whitelisted math function"
- "transpile_expr: forbidden keyword returns error"
- "transpile_expr: bare identifier not allowed"
- "is_transpilable: simple expression is transpilable"
- "is_transpilable: loop keyword not transpilable"
- "emit_sql: select simple single table"
- "emit_sql: filter eq produces WHERE with placeholder"
- "emit_sql: sort produces ORDER BY"
- "emit_sql: group by + sum aggregation produces correct SQL"
- "emit_sql: join two tables produces JOIN clause"
- "emit_sql: empty state returns error"
- "emit_sql: empty tables returns error"
- "emit_sql: contains filter produces LIKE with wildcards"
test_file_path: "cpp/tests/test_tql_to_sql.cpp"
file_path: "cpp/functions/core/tql_to_sql.cpp"
params:
- name: state
desc: "TQL State with stages (filters, derived cols, breakouts, aggregations, sorts) and joins. active_stage determines the last CTE emitted."
- name: tables
desc: "Schema and data for each TableInput. The emitter uses headers/name to build the SELECT list and JOIN clauses. Cells are not read (pure SQL generation)."
- name: up_to_stage
desc: "Emit SQL only up to this stage index. -1 = use state.active_stage."
output: "SqlEmit with: sql (WITH ... CTEs + SELECT * FROM tN), params (positional ? bound values), warnings (soft issues like missing join tables or untranspilable derived cols), error (non-empty on hard failure)."
---
## Ejemplo
```cpp
#include "core/tql_to_sql.h"
data_table::State st;
st.stages.push_back(data_table::Stage{});
data_table::Filter f{0, data_table::Op::Eq, "North"};
st.stages[0].filters.push_back(f);
data_table::TableInput ti;
ti.name = "sales";
ti.headers = {"region", "amount"};
auto e = tql_to_sql::emit_sql(st, {ti});
// e.sql -> "WITH t0 AS (\n SELECT \"region\", \"amount\"\n FROM \"sales\"\n WHERE \"region\" = ?\n)\nSELECT * FROM t0;\n"
// e.params -> {"North"}
```
## Cuando usarla
Cuando necesites exportar el estado TQL actual a SQL ejecutable en DuckDB. Usa antes de llamar a DuckDB para hidratar las tablas y ejecutar la query. El caller es responsable de registrar las tablas en DuckDB con los mismos nombres que `TableInput.name`.
## Gotchas
- Dialecto DuckDB: `date_trunc`, `quantile_cont`, `CAST(x AS DOUBLE)`. No compatible con SQLite sin adaptar.
- `transpile_expr` solo acepta el subset Lua: `[col]` refs, operadores aritmeticos/logicos/comparacion, `if/then/else/end`, funciones whitelisted (`math.*`, `string.upper/lower/len/sub`, `tostring`, `tonumber`). Closures, loops, `io.*`, `os.*` dan error.
- Filters de stages N>=1 no se emiten en v1 (pendiente `TODO v2`). Solo stage 0 tiene WHERE.
- Identifiers siempre quoted con `"double quotes"` — preserva case y permite `:` en nombres de breakout con granularity.
- Derived cols con formula fuera del subset se saltan con warning, no error.
- `resolve_main_idx` es static: si `main_source` no coincide con ninguna tabla, usa `tables[0]`.
+58 -4
View File
@@ -212,8 +212,9 @@ static void draw_cell_custom(const ColumnSpec& spec, const char* value,
col.w);
ImGui::PushStyleColor(ImGuiCol_HeaderHovered, hover_col);
ImGui::PushStyleColor(ImGuiCol_HeaderActive, hover_col);
ImGui::Selectable(label, false,
ImGuiSelectableFlags_SpanAllColumns);
// Issue 0081-O.7: removed SpanAllColumns — badge hover must not
// illuminate the entire row, only the badge cell.
ImGui::Selectable(label, false);
ImGui::PopStyleColor(3);
} else {
ImGui::TextUnformatted(label);
@@ -3087,6 +3088,11 @@ void render(const char* id,
ImGuiTableFlags flags =
ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg |
ImGuiTableFlags_Resizable | ImGuiTableFlags_ScrollY;
// Issue 0081-O.7/9: disable ALL Selectable bg painting. Hover + selection
// are painted via TableSetBgColor(CellBg, ...) below, edge-to-edge.
ImGui::PushStyleColor(ImGuiCol_HeaderHovered, ImVec4(0, 0, 0, 0));
ImGui::PushStyleColor(ImGuiCol_HeaderActive, ImVec4(0, 0, 0, 0));
ImGui::PushStyleColor(ImGuiCol_Header, ImVec4(0, 0, 0, 0));
if (ImGui::BeginTable(id, visible_cols, flags, ImVec2(0, 0))) {
for (int dc = 0; dc < (int)st.col_order.size(); ++dc) {
@@ -3203,6 +3209,14 @@ void render(const char* id,
if (c < 0 || c >= eff_cols) continue;
if (!st.col_visible[c]) continue;
ImGui::TableSetColumnIndex(draw_idx++);
// Issue 0081-O.7: capture cell rect before rendering for
// per-cell hover overlay (manual hit-test avoids relying on
// GetItemRect of the last sub-item inside draw_cell_custom).
ImVec2 cell_min = ImGui::GetCursorScreenPos();
float cell_w = ImGui::GetContentRegionAvail().x;
float cell_h = ImGui::GetTextLineHeight();
ImVec2 cell_max(cell_min.x + cell_w, cell_min.y + cell_h);
int src = src_for_eff[c];
std::string eval_buf;
const char* cell;
@@ -3251,8 +3265,15 @@ void render(const char* id,
}
}
if (!custom_rendered) {
// Issue 0081-O.8: disable Selectable's own bg paint so
// it doesn't double-up with the manual cell overlay below.
// Pass explicit size so empty cells still have a hit area
// (otherwise drag-select skips empty cells).
ImGui::PushStyleColor(ImGuiCol_HeaderHovered, ImVec4(0, 0, 0, 0));
ImGui::Selectable(cell ? cell : "", in_sel,
ImGuiSelectableFlags_AllowDoubleClick);
ImGuiSelectableFlags_AllowDoubleClick,
ImVec2(0, ImGui::GetTextLineHeight()));
ImGui::PopStyleColor();
// Tooltip for Text cells (Phase 2).
if (cell_cs && cell_cs->tooltip_on_hover &&
ImGui::IsItemHovered()) {
@@ -3263,6 +3284,27 @@ void render(const char* id,
}
}
}
// Issue 0081-O.7/9/10: per-cell bg via TableSetBgColor.
// Edge-to-edge (full cell incl. CellPadding). Selection (in_sel)
// overrides hover. Both colors uniform — no Selectable padding.
{
ImVec2 mp = ImGui::GetMousePos();
ImVec2 pad = ImGui::GetStyle().CellPadding;
bool is_hovered =
mp.x >= cell_min.x - pad.x && mp.x < cell_min.x + cell_w + pad.x &&
mp.y >= cell_min.y - pad.y && mp.y < cell_min.y + cell_h + pad.y &&
ImGui::IsWindowHovered(ImGuiHoveredFlags_AllowWhenBlockedByActiveItem);
if (in_sel) {
// Selected (drag-range): blue, slightly stronger when hovered.
ImGui::TableSetBgColor(ImGuiTableBgTarget_CellBg,
is_hovered
? IM_COL32(102, 140, 217, 80)
: IM_COL32(102, 140, 217, 60));
} else if (is_hovered) {
ImGui::TableSetBgColor(ImGuiTableBgTarget_CellBg,
IM_COL32(255, 255, 255, 22));
}
}
// AllowWhenBlockedByActiveItem: durante drag,
// otras celdas tambien reciben hover -> sel se
// pinta mientras arrastras.
@@ -3318,6 +3360,7 @@ void render(const char* id,
}
ImGui::EndTable();
}
ImGui::PopStyleColor(3); // HeaderHovered/HeaderActive/Header (issue 0081-O.7)
// Ctrl+C -> TSV.
if (U.sel_active && ImGui::GetIO().KeyCtrl &&
@@ -3747,6 +3790,10 @@ void render(const char* id,
ImGuiTableFlags flags =
ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg |
ImGuiTableFlags_Resizable | ImGuiTableFlags_ScrollY;
// Issue 0081-O.7: tone down row hover (default 0.31 alpha was too bright).
ImGui::PushStyleColor(ImGuiCol_HeaderHovered, ImVec4(1.0f, 1.0f, 1.0f, 0.05f));
ImGui::PushStyleColor(ImGuiCol_HeaderActive, ImVec4(1.0f, 1.0f, 1.0f, 0.08f));
ImGui::PushStyleColor(ImGuiCol_Header, ImVec4(0.40f, 0.55f, 0.85f, 0.20f));
if (cur_cols_n > 0 && ImGui::BeginTable(id, cur_cols_n, flags, ImVec2(0, 0))) {
for (int c = 0; c < cur_cols_n; ++c) {
ImGui::TableSetupColumn(cur_headers[c].c_str(),
@@ -3839,7 +3886,13 @@ void render(const char* id,
}
}
if (!custom_rendered) {
ImGui::Selectable(cell ? cell : "");
// Issue 0081-O.8: disable Selectable's bg paint to avoid
// double hover with the manual cell overlay; explicit size
// ensures empty cells have a hit area.
ImGui::PushStyleColor(ImGuiCol_HeaderHovered, ImVec4(0, 0, 0, 0));
ImGui::Selectable(cell ? cell : "", false, 0,
ImVec2(0, ImGui::GetTextLineHeight()));
ImGui::PopStyleColor();
// Tooltip for Text cells (Phase 2).
if (cell_cs2 && cell_cs2->tooltip_on_hover &&
ImGui::IsItemHovered()) {
@@ -3893,6 +3946,7 @@ void render(const char* id,
}
ImGui::EndTable();
}
ImGui::PopStyleColor(3); // HeaderHovered/HeaderActive/Header (issue 0081-O.7)
}
stage_n_table_end:;
+11 -1
View File
@@ -3,7 +3,7 @@ name: data_table
kind: function
lang: cpp
domain: viz
version: "1.3.1"
version: "1.3.6"
purity: impure
signature: "void data_table::render(const char* id, const std::vector<TableInput>& tables, State& st, std::vector<TableEvent>* events_out = nullptr, bool show_chrome = true)"
description: "Render UI completa de tabla TQL: chips bar, tabla, viz panels, column-stats inline, drill, color rules, joins, TQL editor, Ask AI, Button renderer, event sink (ButtonClick/RowDoubleClick/RowRightClick), tooltip per-cell, column_specs persisted in TQL. Dots renderer para sparkline-like de status (v1.3.0). Entry-point publica del stack data_table. Muta State segun interaccion del usuario."
@@ -186,5 +186,15 @@ v1.3.0 (2026-05-15) — Dots renderer for inline status timelines (sparkline-lik
v1.3.1 (2026-05-15) — Dots renderer now draws filled circles via ImDrawList instead of Unicode glyph. Font-independent: works regardless of TTF glyph coverage. Closes "dots show as ?" bug in dag_engine_ui.
v1.3.2 (2026-05-15) — Hover dimming: row uses muted alpha (0.05 vs default 0.31); hovered cell gets a subtle overlay (~9% white) via ImDrawList. Badge no longer SpanAllColumns. Closes "table-wide bright highlight on hover".
v1.3.3 (2026-05-15) — Selectable bg disabled for Text/empty cells (was duplicating with manual overlay → gray double-hover). Explicit ImVec2 size on Selectable so empty cells get a hit area (fixes drag-select skipping empties). Single uniform hover layer across all cell renderers.
v1.3.4 (2026-05-15) — Row height tightened: GetTextLineHeight() (no spacing) replaces GetTextLineHeightWithSpacing() in Selectable size + manual overlay rect. Removes inflated row vertical padding introduced in v1.3.3.
v1.3.5 (2026-05-15) — Cell hover paints via TableSetBgColor (covers entire cell bg including CellPadding) instead of manual AddRectFilled inside content area. Hit-test expanded by CellPadding for proper edge-to-edge coverage. Closes "hover has gap between cell borders".
v1.3.6 (2026-05-15) — Selection (drag-range) also paints via TableSetBgColor — same edge-to-edge coverage as hover. Header/HeaderHovered/HeaderActive colors set to fully transparent so Selectable doesn't paint anything; all cell bg states (hover, selected, selected+hover) go through TableSetBgColor uniformly.
---
Promovido desde `cpp/apps/primitives_gallery/playground/tables/data_table.{h,cpp}` — issue 0081-H.
+924
View File
@@ -0,0 +1,924 @@
#include "viz/viz_render.h"
#include "core/auto_detect_type.h" // parse_number
#include "implot.h"
#include <algorithm>
#include <cmath>
#include <cstdio>
#include <cstring>
#include <string>
#include <unordered_map>
#include <vector>
namespace viz {
using data_table::StageOutput;
using data_table::ColumnType;
using data_table::ViewMode;
using data_table::ViewConfig;
using data_table::parse_number;
// Hit-test helpers (pure math; extracted from data_table_logic).
namespace {
// Returns index of nearest point in xs[]/ys[] to (tx,ty). -1 if n==0.
static int nearest_index_2d(double tx, double ty,
const double* xs, const double* ys, int n) {
int best = -1;
double best_d = 1e300;
for (int i = 0; i < n; ++i) {
double dx = xs[i] - tx, dy = ys[i] - ty;
double d = dx*dx + dy*dy;
if (d < best_d) { best_d = d; best = i; }
}
return best;
}
// Returns angle in [0, 360) CCW from top (12 o'clock) for point (mx,my)
// relative to center (cx,cy).
static double pie_angle(double cx, double cy, double mx, double my) {
constexpr double kPI = 3.14159265358979323846;
double dx = mx - cx, dy = my - cy;
double angle_deg = std::atan2(dy, dx) * 180.0 / kPI;
double offset = angle_deg - 90.0;
while (offset < 0.0) offset += 360.0;
while (offset >= 360.0) offset -= 360.0;
return offset;
}
// Returns slice index for angle (deg, [0,360)) in a pie of n slices with
// given `values`. Returns n-1 for last slice. -1 if n==0.
static int pie_slice_at_angle(double angle, const double* values, int n) {
if (n <= 0) return -1;
double total = 0.0;
for (int i = 0; i < n; ++i) total += values[i];
if (total == 0.0) return 0;
double acc = 0.0;
for (int i = 0; i < n; ++i) {
double sweep = (values[i] / total) * 360.0;
if (angle >= acc && angle < acc + sweep) return i;
acc += sweep;
}
return n - 1;
}
// Maps plot coordinates (px,py) in a heatmap (rows x cols, y=0..rows x=0..cols)
// to (row_out, col_out). Sets -1 if out of range.
static void heatmap_cell_at(double px, double py, int rows, int cols,
int& row_out, int& col_out) {
col_out = (int)std::floor(px);
row_out = (int)std::floor(py);
if (row_out < 0 || row_out >= rows || col_out < 0 || col_out >= cols) {
row_out = col_out = -1;
}
}
} // anon (hit-test helpers)
static int find_header(const StageOutput& out, const std::string& name) {
if (name.empty()) return -1;
for (size_t c = 0; c < out.headers.size(); ++c)
if (out.headers[c] == name) return (int)c;
return -1;
}
static int resolve_x(const StageOutput& out, const ViewConfig& cfg, int fallback) {
int c = find_header(out, cfg.x_col);
return (c >= 0) ? c : fallback;
}
static int resolve_cat(const StageOutput& out, const ViewConfig& cfg, int fallback) {
int c = find_header(out, cfg.cat_col);
return (c >= 0) ? c : fallback;
}
static int resolve_size(const StageOutput& out, const ViewConfig& cfg, int fallback) {
int c = find_header(out, cfg.size_col);
return (c >= 0) ? c : fallback;
}
int first_numeric_col(const StageOutput& out) {
for (size_t c = 0; c < out.types.size(); ++c) {
if (out.types[c] == ColumnType::Int || out.types[c] == ColumnType::Float) return (int)c;
}
return -1;
}
int first_category_col(const StageOutput& out) {
for (size_t c = 0; c < out.types.size(); ++c) {
ColumnType t = out.types[c];
if (t == ColumnType::String || t == ColumnType::Date || t == ColumnType::Bool ||
t == ColumnType::Json) return (int)c;
}
return -1;
}
std::vector<double> extract_numeric(const StageOutput& out, int col) {
std::vector<double> v;
if (col < 0 || col >= out.cols) return v;
v.reserve(out.rows);
for (int r = 0; r < out.rows; ++r) {
const char* s = out.cells[(size_t)r * out.cols + col];
double d = 0;
if (s && *s && parse_number(s, d)) v.push_back(d);
else v.push_back(std::nan(""));
}
return v;
}
std::vector<std::string> extract_category(const StageOutput& out, int col) {
std::vector<std::string> v;
if (col < 0 || col >= out.cols) return v;
v.reserve(out.rows);
for (int r = 0; r < out.rows; ++r) {
const char* s = out.cells[(size_t)r * out.cols + col];
v.emplace_back(s ? s : "");
}
return v;
}
namespace {
struct NumCol { int idx; std::string name; std::vector<double> vals; };
std::vector<NumCol> collect_numeric(const StageOutput& out, int max_n = 16) {
std::vector<NumCol> r;
for (size_t c = 0; c < out.types.size() && (int)r.size() < max_n; ++c) {
if (out.types[c] == ColumnType::Int || out.types[c] == ColumnType::Float) {
NumCol nc;
nc.idx = (int)c;
nc.name = out.headers[c];
nc.vals = extract_numeric(out, (int)c);
r.push_back(std::move(nc));
}
}
return r;
}
std::vector<NumCol> collect_numeric_filtered(const StageOutput& out,
const ViewConfig& cfg,
int max_n = 16) {
if (cfg.y_cols.empty()) return collect_numeric(out, max_n);
std::vector<NumCol> r;
for (const auto& name : cfg.y_cols) {
if ((int)r.size() >= max_n) break;
int c = find_header(out, name);
if (c < 0) continue;
if (out.types[c] != ColumnType::Int && out.types[c] != ColumnType::Float) continue;
NumCol nc;
nc.idx = c;
nc.name = out.headers[c];
nc.vals = extract_numeric(out, c);
r.push_back(std::move(nc));
}
if (r.empty()) return collect_numeric(out, max_n);
return r;
}
ImPlotSpec spec_with_color(unsigned int rgba_color) {
if (rgba_color == 0) return ImPlotSpec();
ImU32 c = (ImU32)rgba_color;
return ImPlotSpec(ImPlotProp_LineColor, c, ImPlotProp_FillColor, c);
}
// Axis flags: locked = no pan/zoom; unlocked = 0 (sin AutoFit, para preservar
// pan/zoom del user). Re-fit explicito via SetNextAxesToFit cuando fit_request.
ImPlotAxisFlags axflag(const ViewConfig& cfg, ImPlotAxisFlags base = 0) {
if (cfg.locked) return base | ImPlotAxisFlags_Lock;
return base;
}
// Llamar antes de BeginPlot. Si cfg.fit_request -> fuerza re-fit y limpia el flag.
void maybe_fit(const ViewConfig& cfg) {
if (cfg.fit_request) {
ImPlot::SetNextAxesToFit();
cfg.fit_request = false;
}
}
void info_text(const char* msg) {
ImVec2 avail = ImGui::GetContentRegionAvail();
ImVec2 sz = ImGui::CalcTextSize(msg);
ImGui::SetCursorPos(ImVec2(ImGui::GetCursorPosX() + (avail.x - sz.x) * 0.5f,
ImGui::GetCursorPosY() + (avail.y - sz.y) * 0.5f));
ImGui::TextDisabled("%s", msg);
}
// Drop NaN and pair with optional labels.
std::vector<double> finite(const std::vector<double>& v) {
std::vector<double> r; r.reserve(v.size());
for (double d : v) if (!std::isnan(d)) r.push_back(d);
return r;
}
bool render_bar_like(const StageOutput& out, ViewMode mode,
const ViewConfig& cfg, ImVec2 size,
int* clicked_row_out = nullptr) {
int cat_col = resolve_cat(out, cfg, first_category_col(out));
auto nums = collect_numeric_filtered(out, cfg, 8);
if (cat_col < 0 || nums.empty()) {
info_text("Need 1 category + 1+ numeric columns");
return false;
}
auto cats = extract_category(out, cat_col);
int n = (int)cats.size();
if (n == 0) { info_text("Empty data"); return false; }
// Ticks
std::vector<double> ticks(n);
std::vector<const char*> labels(n);
for (int i = 0; i < n; ++i) { ticks[i] = i; labels[i] = cats[i].c_str(); }
bool horiz = (mode == ViewMode::Bar);
ImPlotFlags pflags = cfg.show_legend ? 0 : ImPlotFlags_NoLegend;
maybe_fit(cfg);
if (!ImPlot::BeginPlot("##bar", size, pflags)) return false;
ImPlotAxisFlags ax_cat = axflag(cfg);
ImPlotAxisFlags ax_num = axflag(cfg);
if (horiz) {
ImPlot::SetupAxes(out.headers[nums[0].idx].c_str(), out.headers[cat_col].c_str(),
ax_num, ax_cat);
ImPlot::SetupAxisTicks(ImAxis_Y1, ticks.data(), n, labels.data(), false);
} else {
ImPlot::SetupAxes(out.headers[cat_col].c_str(), out.headers[nums[0].idx].c_str(),
ax_cat, ax_num);
ImPlot::SetupAxisTicks(ImAxis_X1, ticks.data(), n, labels.data(), false);
}
if (mode == ViewMode::StackedBar || mode == ViewMode::GroupedBar) {
// Build flat matrix items x groups
int items = (int)nums.size();
std::vector<double> mat((size_t)items * n, 0.0);
std::vector<const char*> series_labels(items);
for (int it = 0; it < items; ++it) {
series_labels[it] = nums[it].name.c_str();
for (int g = 0; g < n; ++g) {
double d = nums[it].vals[g];
mat[(size_t)it * n + g] = std::isnan(d) ? 0.0 : d;
}
}
int flags = (mode == ViewMode::StackedBar) ? ImPlotBarGroupsFlags_Stacked : 0;
if (horiz) flags |= ImPlotBarGroupsFlags_Horizontal;
ImPlot::PlotBarGroups(series_labels.data(), mat.data(), items, n, 0.67, 0,
ImPlotSpec(ImPlotProp_Flags, flags));
} else {
// Single series (first numeric col).
std::vector<double> ys(n);
for (int i = 0; i < n; ++i) {
double d = nums[0].vals[i];
ys[i] = std::isnan(d) ? 0.0 : d;
}
ImPlotSpec spc = spec_with_color(cfg.primary_color);
if (horiz) {
if (cfg.primary_color != 0) {
ImU32 col = (ImU32)cfg.primary_color;
ImPlot::PlotBars(nums[0].name.c_str(), ys.data(), ticks.data(), n, 0.67,
ImPlotSpec(ImPlotProp_Flags, ImPlotBarsFlags_Horizontal,
ImPlotProp_FillColor, col,
ImPlotProp_LineColor, col));
} else {
ImPlot::PlotBars(nums[0].name.c_str(), ys.data(), ticks.data(), n, 0.67,
ImPlotSpec(ImPlotProp_Flags, ImPlotBarsFlags_Horizontal));
}
} else {
ImPlot::PlotBars(nums[0].name.c_str(), ticks.data(), ys.data(), n, 0.67, spc);
}
}
// Hit-test fase 10: idx = round(plot.{x|y}) en single-series mode.
if (clicked_row_out &&
mode != ViewMode::GroupedBar && mode != ViewMode::StackedBar &&
ImPlot::IsPlotHovered() && ImGui::IsMouseClicked(ImGuiMouseButton_Left)) {
ImPlotPoint p = ImPlot::GetPlotMousePos();
double target = horiz ? p.y : p.x;
int idx = (int)(target + 0.5);
if (idx >= 0 && idx < n) *clicked_row_out = idx;
}
ImPlot::EndPlot();
return true;
}
bool render_line_like(const StageOutput& out, ViewMode mode,
const ViewConfig& cfg, ImVec2 size) {
auto nums = collect_numeric_filtered(out, cfg, 8);
if (nums.empty()) { info_text("Need at least 1 numeric column"); return false; }
ImPlotFlags pflags = cfg.show_legend ? 0 : ImPlotFlags_NoLegend;
maybe_fit(cfg);
if (!ImPlot::BeginPlot("##line", size, pflags)) return false;
ImPlot::SetupAxes(nullptr, nullptr, axflag(cfg), axflag(cfg));
int n = nums.empty() ? 0 : (int)nums[0].vals.size();
if (n == 0) { ImPlot::EndPlot(); return false; }
// X column: cfg.x_col override; sino primer numeric si hay >=2; sino indices.
int x_idx = -1;
if (!cfg.x_col.empty()) {
int xc = find_header(out, cfg.x_col);
if (xc >= 0 && (out.types[xc] == ColumnType::Int || out.types[xc] == ColumnType::Float)) {
x_idx = xc;
}
}
std::vector<double> idx_xs;
const double* xs = nullptr;
int start_y = 0;
std::vector<double> x_data_external;
if (x_idx >= 0) {
x_data_external = extract_numeric(out, x_idx);
xs = x_data_external.data();
} else if (nums.size() >= 2 && cfg.y_cols.empty()) {
xs = nums[0].vals.data();
start_y = 1;
} else {
idx_xs.resize(n);
for (int i = 0; i < n; ++i) idx_xs[i] = i;
xs = idx_xs.data();
}
bool only_one = (cfg.primary_color != 0) && (nums.size() - start_y == 1);
for (size_t i = (size_t)start_y; i < nums.size(); ++i) {
const auto& nc = nums[i];
ImU32 col = only_one ? (ImU32)cfg.primary_color : 0;
int marker = cfg.show_markers ? ImPlotMarker_Circle : ImPlotMarker_None;
if (mode == ViewMode::Area) {
if (col) {
ImPlot::PlotShaded(nc.name.c_str(), xs, nc.vals.data(), (int)nc.vals.size(), 0.0,
ImPlotSpec(ImPlotProp_FillColor, col, ImPlotProp_LineColor, col));
} else {
ImPlot::PlotShaded(nc.name.c_str(), xs, nc.vals.data(), (int)nc.vals.size(), 0.0);
}
} else if (mode == ViewMode::Stairs) {
if (col) {
ImPlot::PlotStairs(nc.name.c_str(), xs, nc.vals.data(), (int)nc.vals.size(),
ImPlotSpec(ImPlotProp_LineColor, col));
} else {
ImPlot::PlotStairs(nc.name.c_str(), xs, nc.vals.data(), (int)nc.vals.size());
}
} else {
if (col) {
ImPlot::PlotLine(nc.name.c_str(), xs, nc.vals.data(), (int)nc.vals.size(),
ImPlotSpec(ImPlotProp_Flags, ImPlotLineFlags_SkipNaN,
ImPlotProp_LineColor, col,
ImPlotProp_Marker, marker));
} else {
ImPlot::PlotLine(nc.name.c_str(), xs, nc.vals.data(), (int)nc.vals.size(),
ImPlotSpec(ImPlotProp_Flags, ImPlotLineFlags_SkipNaN,
ImPlotProp_Marker, marker));
}
}
}
ImPlot::EndPlot();
return true;
}
bool render_scatter(const StageOutput& out, const ViewConfig& cfg, ImVec2 size,
int* clicked_row_out = nullptr) {
// Soporte cfg.x_col + cfg.y_cols[0]
int xc = find_header(out, cfg.x_col);
int yc = !cfg.y_cols.empty() ? find_header(out, cfg.y_cols[0]) : -1;
std::vector<NumCol> nums;
if (xc >= 0 && yc >= 0) {
NumCol a{xc, out.headers[xc], extract_numeric(out, xc)};
NumCol b{yc, out.headers[yc], extract_numeric(out, yc)};
nums = {a, b};
} else {
nums = collect_numeric(out, 4);
}
if (nums.size() < 2) { info_text("Need 2 numeric columns"); return false; }
maybe_fit(cfg);
if (!ImPlot::BeginPlot("##scatter", size, cfg.show_legend ? 0 : ImPlotFlags_NoLegend)) return false;
ImPlot::SetupAxes(nums[0].name.c_str(), nums[1].name.c_str(),
axflag(cfg), axflag(cfg));
if (cfg.primary_color) {
ImU32 col = (ImU32)cfg.primary_color;
ImPlot::PlotScatter("##s", nums[0].vals.data(), nums[1].vals.data(),
(int)nums[0].vals.size(),
ImPlotSpec(ImPlotProp_MarkerFillColor, col,
ImPlotProp_MarkerLineColor, col));
} else {
ImPlot::PlotScatter("##s", nums[0].vals.data(), nums[1].vals.data(),
(int)nums[0].vals.size());
}
if (clicked_row_out &&
ImPlot::IsPlotHovered() && ImGui::IsMouseClicked(ImGuiMouseButton_Left)) {
ImPlotPoint p = ImPlot::GetPlotMousePos();
int idx = nearest_index_2d(p.x, p.y,
nums[0].vals.data(), nums[1].vals.data(),
(int)nums[0].vals.size());
if (idx >= 0) *clicked_row_out = idx;
}
ImPlot::EndPlot();
return true;
}
bool render_bubble(const StageOutput& out, const ViewConfig& cfg, ImVec2 size,
int* clicked_row_out = nullptr) {
int xc = find_header(out, cfg.x_col);
int yc = !cfg.y_cols.empty() ? find_header(out, cfg.y_cols[0]) : -1;
int sc = resolve_size(out, cfg, -1);
std::vector<NumCol> nums;
if (xc >= 0 && yc >= 0 && sc >= 0) {
nums = {
{xc, out.headers[xc], extract_numeric(out, xc)},
{yc, out.headers[yc], extract_numeric(out, yc)},
{sc, out.headers[sc], extract_numeric(out, sc)},
};
} else {
nums = collect_numeric(out, 4);
}
if (nums.size() < 3) { info_text("Need 3 numeric columns (x, y, size)"); return false; }
maybe_fit(cfg);
if (!ImPlot::BeginPlot("##bubble", size, cfg.show_legend ? 0 : ImPlotFlags_NoLegend)) return false;
ImPlot::SetupAxes(nums[0].name.c_str(), nums[1].name.c_str(),
axflag(cfg), axflag(cfg));
ImPlot::PlotBubbles("##b", nums[0].vals.data(), nums[1].vals.data(),
nums[2].vals.data(), (int)nums[0].vals.size());
if (clicked_row_out &&
ImPlot::IsPlotHovered() && ImGui::IsMouseClicked(ImGuiMouseButton_Left)) {
ImPlotPoint p = ImPlot::GetPlotMousePos();
int idx = nearest_index_2d(p.x, p.y,
nums[0].vals.data(), nums[1].vals.data(),
(int)nums[0].vals.size());
if (idx >= 0) *clicked_row_out = idx;
}
ImPlot::EndPlot();
return true;
}
bool render_histogram(const StageOutput& out, const ViewConfig& cfg, ImVec2 size) {
auto nums = collect_numeric_filtered(out, cfg, 4);
if (nums.empty()) { info_text("Need 1 numeric column"); return false; }
auto vals = finite(nums[0].vals);
if (vals.empty()) { info_text("No finite values"); return false; }
maybe_fit(cfg);
if (!ImPlot::BeginPlot("##hist", size, cfg.show_legend ? 0 : ImPlotFlags_NoLegend)) return false;
ImPlot::SetupAxes(nums[0].name.c_str(), "count",
axflag(cfg), axflag(cfg));
int bins = (cfg.hist_bins > 0) ? cfg.hist_bins : ImPlotBin_Sturges;
if (cfg.primary_color) {
ImU32 col = (ImU32)cfg.primary_color;
ImPlot::PlotHistogram("##h", vals.data(), (int)vals.size(), bins, 1.0,
ImPlotRange(),
ImPlotSpec(ImPlotProp_FillColor, col,
ImPlotProp_LineColor, col));
} else {
ImPlot::PlotHistogram("##h", vals.data(), (int)vals.size(), bins);
}
ImPlot::EndPlot();
return true;
}
bool render_hist2d(const StageOutput& out, const ViewConfig& cfg, ImVec2 size) {
int xc = find_header(out, cfg.x_col);
int yc = !cfg.y_cols.empty() ? find_header(out, cfg.y_cols[0]) : -1;
std::vector<NumCol> nums;
if (xc >= 0 && yc >= 0) {
nums = {
{xc, out.headers[xc], extract_numeric(out, xc)},
{yc, out.headers[yc], extract_numeric(out, yc)},
};
} else {
nums = collect_numeric(out, 2);
}
if (nums.size() < 2) { info_text("Need 2 numeric columns"); return false; }
maybe_fit(cfg);
if (!ImPlot::BeginPlot("##hist2d", size, cfg.show_legend ? 0 : ImPlotFlags_NoLegend)) return false;
ImPlot::SetupAxes(nums[0].name.c_str(), nums[1].name.c_str());
int bins = (cfg.hist_bins > 0) ? cfg.hist_bins : ImPlotBin_Sturges;
ImPlot::PlotHistogram2D("##h2", nums[0].vals.data(), nums[1].vals.data(),
(int)nums[0].vals.size(), bins, bins);
ImPlot::EndPlot();
return true;
}
bool render_heatmap(const StageOutput& out, const ViewConfig& cfg, ImVec2 size,
int* clicked_row_out = nullptr) {
auto nums = collect_numeric_filtered(out, cfg, 64);
if (nums.empty()) { info_text("Need numeric columns"); return false; }
int cols = (int)nums.size();
int rows = (int)nums[0].vals.size();
if (rows == 0) { info_text("No rows"); return false; }
std::vector<double> mat((size_t)rows * cols, 0.0);
double mn = +1e300, mx = -1e300;
for (int c = 0; c < cols; ++c) {
for (int r = 0; r < rows; ++r) {
double d = nums[c].vals[r];
if (std::isnan(d)) d = 0;
mat[(size_t)r * cols + c] = d;
if (d < mn) mn = d; if (d > mx) mx = d;
}
}
if (mn == mx) { mx = mn + 1; }
maybe_fit(cfg);
if (!ImPlot::BeginPlot("##heatmap", size, 0)) return false;
ImPlot::PlotHeatmap("##hm", mat.data(), rows, cols, mn, mx, nullptr);
if (clicked_row_out &&
ImPlot::IsPlotHovered() && ImGui::IsMouseClicked(ImGuiMouseButton_Left)) {
ImPlotPoint p = ImPlot::GetPlotMousePos();
// ImPlot heatmap Y se pinta de top a bottom; plot mouse_y va igual
// (default scale 0..rows). Mapeo directo.
int rr, cc;
heatmap_cell_at(p.x, p.y, rows, cols, rr, cc);
if (rr >= 0) *clicked_row_out = rr;
(void)cc;
}
ImPlot::EndPlot();
return true;
}
bool render_pie(const StageOutput& out, const ViewConfig& cfg, bool donut, ImVec2 size,
int* clicked_row_out = nullptr) {
int cat = resolve_cat(out, cfg, first_category_col(out));
auto nums = collect_numeric_filtered(out, cfg, 1);
if (cat < 0 || nums.empty()) { info_text("Need 1 category + 1 numeric"); return false; }
auto cats = extract_category(out, cat);
int n = std::min((int)cats.size(), (int)nums[0].vals.size());
if (n == 0) return false;
std::vector<double> values(n);
std::vector<const char*> labels(n);
for (int i = 0; i < n; ++i) {
double d = nums[0].vals[i];
values[i] = std::isnan(d) ? 0.0 : std::abs(d);
labels[i] = cats[i].c_str();
}
ImPlotFlags pf = ImPlotFlags_Equal;
if (!cfg.show_legend) pf |= ImPlotFlags_NoLegend;
maybe_fit(cfg);
if (!ImPlot::BeginPlot("##pie", size, pf)) return false;
ImPlot::SetupAxes(nullptr, nullptr, ImPlotAxisFlags_NoDecorations,
ImPlotAxisFlags_NoDecorations);
ImPlot::SetupAxesLimits(0, 1, 0, 1, ImPlotCond_Always);
double radius = (cfg.pie_radius > 0) ? cfg.pie_radius : (donut ? 0.4 : 0.45);
ImPlot::PlotPieChart(labels.data(), values.data(), n, 0.5, 0.5, radius, "%.1f");
if (donut) {
// Draw inner hole as solid circle by overlaying a smaller pie of one slice transparent.
// Simpler: just visually it's a circle with text. Use no extra primitive for now.
}
if (clicked_row_out &&
ImPlot::IsPlotHovered() && ImGui::IsMouseClicked(ImGuiMouseButton_Left)) {
ImPlotPoint p = ImPlot::GetPlotMousePos();
double dx = p.x - 0.5, dy = p.y - 0.5;
double dist2 = dx*dx + dy*dy;
double inner = donut ? (radius * 0.5) : 0.0;
if (dist2 <= radius * radius && dist2 >= inner * inner) {
double ang = pie_angle(0.5, 0.5, p.x, p.y);
int idx = pie_slice_at_angle(ang, values.data(), n);
if (idx >= 0) *clicked_row_out = idx;
}
}
ImPlot::EndPlot();
return true;
}
bool render_funnel(const StageOutput& out, const ViewConfig& cfg, ImVec2 size,
int* clicked_row_out = nullptr) {
int cat = resolve_cat(out, cfg, first_category_col(out));
auto nums = collect_numeric_filtered(out, cfg, 1);
if (cat < 0 || nums.empty()) { info_text("Need 1 category + 1 numeric"); return false; }
auto cats = extract_category(out, cat);
int n = std::min((int)cats.size(), (int)nums[0].vals.size());
if (n == 0) return false;
// Sort desc by value
std::vector<int> idx(n);
for (int i = 0; i < n; ++i) idx[i] = i;
std::sort(idx.begin(), idx.end(), [&](int a, int b) {
double da = std::isnan(nums[0].vals[a]) ? -1e300 : nums[0].vals[a];
double db = std::isnan(nums[0].vals[b]) ? -1e300 : nums[0].vals[b];
return da > db;
});
std::vector<double> ys(n);
std::vector<double> ticks(n);
std::vector<const char*> labels(n);
std::vector<std::string> labels_store(n);
for (int i = 0; i < n; ++i) {
double d = nums[0].vals[idx[i]];
ys[i] = std::isnan(d) ? 0 : d;
ticks[i] = n - 1 - i; // descending order
labels_store[i] = cats[idx[i]];
labels[i] = labels_store[i].c_str();
}
maybe_fit(cfg);
if (!ImPlot::BeginPlot("##funnel", size, 0)) return false;
ImPlot::SetupAxes(nums[0].name.c_str(), out.headers[cat].c_str(),
axflag(cfg), axflag(cfg));
ImPlot::SetupAxisTicks(ImAxis_Y1, ticks.data(), n, labels.data(), false);
ImPlot::PlotBars(nums[0].name.c_str(), ys.data(), ticks.data(), n, 0.85,
ImPlotSpec(ImPlotProp_Flags, ImPlotBarsFlags_Horizontal));
if (clicked_row_out &&
ImPlot::IsPlotHovered() && ImGui::IsMouseClicked(ImGuiMouseButton_Left)) {
ImPlotPoint p = ImPlot::GetPlotMousePos();
int tick_idx = (int)(p.y + 0.5);
// ticks[i] = n-1-i. Invertir para idx en orden sorted descendiente.
int sorted_pos = (n - 1) - tick_idx;
if (sorted_pos >= 0 && sorted_pos < n) {
// idx[sorted_pos] da indice de row original en out.
*clicked_row_out = idx[sorted_pos];
}
}
ImPlot::EndPlot();
return true;
}
bool render_waterfall(const StageOutput& out, const ViewConfig& cfg, ImVec2 size) {
auto nums = collect_numeric_filtered(out, cfg, 1);
if (nums.empty()) { info_text("Need 1 numeric column"); return false; }
int n = (int)nums[0].vals.size();
if (n == 0) return false;
int cat = resolve_cat(out, cfg, first_category_col(out));
auto cats = (cat >= 0) ? extract_category(out, cat) : std::vector<std::string>();
std::vector<double> running(n + 1, 0);
for (int i = 0; i < n; ++i) {
double d = std::isnan(nums[0].vals[i]) ? 0 : nums[0].vals[i];
running[i + 1] = running[i] + d;
}
std::vector<double> ticks(n);
for (int i = 0; i < n; ++i) ticks[i] = i;
std::vector<const char*> labels(n);
for (int i = 0; i < n; ++i) labels[i] = (i < (int)cats.size()) ? cats[i].c_str() : "";
maybe_fit(cfg);
if (!ImPlot::BeginPlot("##waterfall", size, 0)) return false;
ImPlot::SetupAxes(nullptr, nums[0].name.c_str(),
axflag(cfg), axflag(cfg));
if (cat >= 0) ImPlot::SetupAxisTicks(ImAxis_X1, ticks.data(), n, labels.data(), false);
// Draw stems with rectangles via error-bars trick: low=cum_prev, high=cum_curr.
std::vector<double> mid(n), err(n);
for (int i = 0; i < n; ++i) {
mid[i] = (running[i] + running[i + 1]) * 0.5;
err[i] = std::abs((running[i + 1] - running[i]) * 0.5);
}
ImPlot::PlotErrorBars("##wf", ticks.data(), mid.data(), err.data(), n);
ImPlot::PlotLine("cum", running.data() + 1, n);
ImPlot::EndPlot();
return true;
}
bool render_kpi_single(const StageOutput& out, const ViewConfig& cfg) {
int nc = !cfg.y_cols.empty() ? find_header(out, cfg.y_cols[0]) : -1;
if (nc < 0) nc = first_numeric_col(out);
if (nc < 0) { info_text("Need 1 numeric column"); return false; }
auto vals = extract_numeric(out, nc);
if (vals.empty()) { info_text("Empty"); return false; }
double last = std::nan("");
for (auto v : vals) if (!std::isnan(v)) last = v;
if (std::isnan(last)) { info_text("No finite values"); return false; }
char buf[64];
if (std::abs(last) >= 1e6) std::snprintf(buf, sizeof(buf), "%.2fM", last / 1e6);
else if (std::abs(last) >= 1e3) std::snprintf(buf, sizeof(buf), "%.2fK", last / 1e3);
else std::snprintf(buf, sizeof(buf), "%.3g", last);
ImVec2 avail = ImGui::GetContentRegionAvail();
ImGui::SetWindowFontScale(4.0f);
ImVec2 sz = ImGui::CalcTextSize(buf);
ImGui::SetCursorPos(ImVec2(ImGui::GetCursorPosX() + (avail.x - sz.x) * 0.5f,
ImGui::GetCursorPosY() + (avail.y - sz.y) * 0.5f - 20));
ImGui::TextUnformatted(buf);
ImGui::SetWindowFontScale(1.0f);
sz = ImGui::CalcTextSize(out.headers[nc].c_str());
ImGui::SetCursorPos(ImVec2(ImGui::GetCursorPosX() + (avail.x - sz.x) * 0.5f,
ImGui::GetCursorPosY() + (avail.y - sz.y) * 0.5f - 10));
ImGui::TextDisabled("%s", out.headers[nc].c_str());
return true;
}
bool render_kpi_grid(const StageOutput& out, const ViewConfig& cfg) {
auto nums = collect_numeric_filtered(out, cfg, 12);
if (nums.empty()) { info_text("Need numeric columns"); return false; }
ImVec2 avail = ImGui::GetContentRegionAvail();
int per_row = std::max(1, (int)(avail.x / 220));
int idx = 0;
for (auto& nc : nums) {
double last = std::nan("");
for (auto v : nc.vals) if (!std::isnan(v)) last = v;
if (std::isnan(last)) last = 0;
char buf[64];
if (std::abs(last) >= 1e6) std::snprintf(buf, sizeof(buf), "%.2fM", last / 1e6);
else if (std::abs(last) >= 1e3) std::snprintf(buf, sizeof(buf), "%.2fK", last / 1e3);
else std::snprintf(buf, sizeof(buf), "%.4g", last);
ImGui::BeginChild((ImGuiID)(0x1000 + idx), ImVec2(210, 100), true);
ImGui::TextDisabled("%s", nc.name.c_str());
ImGui::SetWindowFontScale(2.4f);
ImGui::TextUnformatted(buf);
ImGui::SetWindowFontScale(1.0f);
ImGui::EndChild();
if ((idx % per_row) != (per_row - 1)) ImGui::SameLine();
idx++;
}
return true;
}
bool render_stem(const StageOutput& out, const ViewConfig& cfg, ImVec2 size) {
auto nums = collect_numeric_filtered(out, cfg, 1);
if (nums.empty()) { info_text("Need 1 numeric column"); return false; }
int n = (int)nums[0].vals.size();
std::vector<double> xs(n); for (int i = 0; i < n; ++i) xs[i] = i;
maybe_fit(cfg);
if (!ImPlot::BeginPlot("##stem", size, 0)) return false;
ImPlot::SetupAxes(nullptr, nums[0].name.c_str(),
axflag(cfg), axflag(cfg));
ImPlot::PlotStems(nums[0].name.c_str(), xs.data(), nums[0].vals.data(), n);
ImPlot::EndPlot();
return true;
}
bool render_errorbars(const StageOutput& out, const ViewConfig& cfg, ImVec2 size) {
auto nums = collect_numeric_filtered(out, cfg, 4);
if (nums.size() < 2) { info_text("Need 2 numeric columns (value, err)"); return false; }
int n = (int)nums[0].vals.size();
std::vector<double> xs(n); for (int i = 0; i < n; ++i) xs[i] = i;
maybe_fit(cfg);
if (!ImPlot::BeginPlot("##eb", size, 0)) return false;
ImPlot::SetupAxes(nullptr, nums[0].name.c_str(),
axflag(cfg), axflag(cfg));
ImPlot::PlotErrorBars(nums[0].name.c_str(), xs.data(),
nums[0].vals.data(), nums[1].vals.data(), n);
ImPlot::PlotScatter("##s", xs.data(), nums[0].vals.data(), n);
ImPlot::EndPlot();
return true;
}
// BoxPlot: agrupar por categoria, calcular min/p25/p50/p75/max y dibujar
// rectangulos manuales via PlotShaded + lineas.
bool render_boxplot(const StageOutput& out, const ViewConfig& cfg, ImVec2 size) {
int cat = resolve_cat(out, cfg, first_category_col(out));
auto nums = collect_numeric_filtered(out, cfg, 1);
if (cat < 0 || nums.empty()) { info_text("Need 1 category + 1 numeric"); return false; }
auto cats = extract_category(out, cat);
int n = std::min((int)cats.size(), (int)nums[0].vals.size());
if (n == 0) return false;
// Group values by category
std::unordered_map<std::string, std::vector<double>> groups;
std::vector<std::string> order;
for (int i = 0; i < n; ++i) {
if (groups.find(cats[i]) == groups.end()) order.push_back(cats[i]);
double d = nums[0].vals[i];
if (!std::isnan(d)) groups[cats[i]].push_back(d);
}
int G = (int)order.size();
if (G == 0) return false;
std::vector<double> mn(G), p25(G), p50(G), p75(G), mx(G), xs(G);
std::vector<const char*> labels(G);
for (int g = 0; g < G; ++g) {
auto& v = groups[order[g]];
std::sort(v.begin(), v.end());
int N = (int)v.size();
xs[g] = g;
labels[g]= order[g].c_str();
if (N == 0) { mn[g]=p25[g]=p50[g]=p75[g]=mx[g]=0; continue; }
mn[g] = v.front();
mx[g] = v.back();
p25[g] = v[std::min(N - 1, (int)(N * 0.25))];
p50[g] = v[std::min(N - 1, (int)(N * 0.50))];
p75[g] = v[std::min(N - 1, (int)(N * 0.75))];
}
maybe_fit(cfg);
if (!ImPlot::BeginPlot("##box", size, 0)) return false;
ImPlot::SetupAxes(out.headers[cat].c_str(), nums[0].name.c_str(),
axflag(cfg), axflag(cfg));
ImPlot::SetupAxisTicks(ImAxis_X1, xs.data(), G, labels.data(), false);
// Whiskers: stems from min to max
for (int g = 0; g < G; ++g) {
double lo[2] = { mn[g], mx[g] };
double xx[2] = { xs[g], xs[g] };
ImPlot::PlotLine("##wh", xx, lo, 2);
}
// Box: p25..p75 as bars centered on p50
std::vector<double> mid(G), half(G);
for (int g = 0; g < G; ++g) {
mid[g] = (p25[g] + p75[g]) * 0.5;
half[g] = (p75[g] - p25[g]) * 0.5;
}
ImPlot::PlotErrorBars("box", xs.data(), mid.data(), half.data(), G);
ImPlot::PlotScatter("median", xs.data(), p50.data(), G);
ImPlot::EndPlot();
return true;
}
// Candlestick: tiempo + O/H/L/C. Asume 4 primeras cols numericas en ese orden.
bool render_candlestick(const StageOutput& out, const ViewConfig& cfg, ImVec2 size) {
auto nums = collect_numeric_filtered(out, cfg, 8);
if (nums.size() < 4) { info_text("Need 4 numeric columns: O/H/L/C"); return false; }
int n = (int)nums[0].vals.size();
maybe_fit(cfg);
if (!ImPlot::BeginPlot("##candle", size, 0)) return false;
ImPlot::SetupAxes("t", "price", axflag(cfg), axflag(cfg));
std::vector<double> xs(n); for (int i = 0; i < n; ++i) xs[i] = i;
const auto& O = nums[0].vals;
const auto& H = nums[1].vals;
const auto& L = nums[2].vals;
const auto& C = nums[3].vals;
// Wicks
for (int i = 0; i < n; ++i) {
double xx[2] = { xs[i], xs[i] };
double yy[2] = { L[i], H[i] };
ImPlot::PlotLine("##wick", xx, yy, 2);
}
// Body via PlotBars(mid, |C-O|)? Simpler: separate lines.
std::vector<double> body_low(n), body_high(n), body_mid(n), body_err(n);
for (int i = 0; i < n; ++i) {
body_low[i] = std::min(O[i], C[i]);
body_high[i] = std::max(O[i], C[i]);
body_mid[i] = (body_low[i] + body_high[i]) * 0.5;
body_err[i] = (body_high[i] - body_low[i]) * 0.5;
}
ImPlot::PlotErrorBars("OHLC", xs.data(), body_mid.data(), body_err.data(), n);
ImPlot::EndPlot();
return true;
}
bool render_radar(const StageOutput& out, const ViewConfig& cfg, ImVec2 size) {
auto nums = collect_numeric_filtered(out, cfg, 12);
if (nums.size() < 3) { info_text("Need 3+ numeric columns"); return false; }
int K = (int)nums.size();
int n = (int)nums[0].vals.size();
if (n == 0) return false;
// Take first row as the polygon.
std::vector<double> xs(K + 1), ys(K + 1);
double radius_norm = 0;
for (int k = 0; k < K; ++k) {
double d = nums[k].vals[0];
if (std::isnan(d)) d = 0;
radius_norm = std::max(radius_norm, std::abs(d));
}
if (radius_norm == 0) radius_norm = 1;
for (int k = 0; k < K; ++k) {
double v = nums[k].vals[0]; if (std::isnan(v)) v = 0;
double angle = 2 * 3.14159265358979 * k / K - 3.14159265358979 / 2;
double r = v / radius_norm;
xs[k] = std::cos(angle) * r;
ys[k] = std::sin(angle) * r;
}
xs[K] = xs[0]; ys[K] = ys[0];
maybe_fit(cfg);
if (!ImPlot::BeginPlot("##radar", size,
ImPlotFlags_Equal | ImPlotFlags_NoLegend)) return false;
ImPlot::SetupAxesLimits(-1.2, 1.2, -1.2, 1.2, ImPlotCond_Always);
ImPlot::SetupAxes(nullptr, nullptr, ImPlotAxisFlags_NoDecorations,
ImPlotAxisFlags_NoDecorations);
// Grid rings
for (double rr : {0.25, 0.5, 0.75, 1.0}) {
double gx[64], gy[64];
for (int i = 0; i < 64; ++i) {
double a = 2 * 3.14159265358979 * i / 63;
gx[i] = std::cos(a) * rr; gy[i] = std::sin(a) * rr;
}
ImPlot::PlotLine("##grid", gx, gy, 64);
}
ImPlot::PlotLine("radar", xs.data(), ys.data(), K + 1);
// Axis labels
for (int k = 0; k < K; ++k) {
double a = 2 * 3.14159265358979 * k / K - 3.14159265358979 / 2;
ImPlot::PlotText(nums[k].name.c_str(), std::cos(a) * 1.1, std::sin(a) * 1.1);
}
ImPlot::EndPlot();
return true;
}
} // anon
bool render(const StageOutput& out, ViewMode mode,
const ViewConfig& cfg, ImVec2 size,
int* clicked_row_out) {
if (clicked_row_out) *clicked_row_out = -1;
if (out.rows == 0 || out.cols == 0) {
info_text("No data");
return false;
}
switch (mode) {
case ViewMode::Table: return false;
case ViewMode::Bar:
case ViewMode::Column:
case ViewMode::GroupedBar:
case ViewMode::StackedBar: return render_bar_like(out, mode, cfg, size, clicked_row_out);
case ViewMode::Line:
case ViewMode::Area:
case ViewMode::Stairs: return render_line_like(out, mode, cfg, size);
case ViewMode::Scatter: return render_scatter(out, cfg, size, clicked_row_out);
case ViewMode::Bubble: return render_bubble(out, cfg, size, clicked_row_out);
case ViewMode::Histogram: return render_histogram(out, cfg, size);
case ViewMode::Histogram2D: return render_hist2d(out, cfg, size);
case ViewMode::Heatmap: return render_heatmap(out, cfg, size, clicked_row_out);
case ViewMode::BoxPlot: return render_boxplot(out, cfg, size);
case ViewMode::Stem: return render_stem(out, cfg, size);
case ViewMode::ErrorBars: return render_errorbars(out, cfg, size);
case ViewMode::Pie: return render_pie(out, cfg, false, size, clicked_row_out);
case ViewMode::Donut: return render_pie(out, cfg, true, size, clicked_row_out);
case ViewMode::Funnel: return render_funnel(out, cfg, size, clicked_row_out);
case ViewMode::Waterfall: return render_waterfall(out, cfg, size);
case ViewMode::KPI: return render_kpi_single(out, cfg);
case ViewMode::KPIGrid: return render_kpi_grid(out, cfg);
case ViewMode::Candlestick: return render_candlestick(out, cfg, size);
case ViewMode::Radar: return render_radar(out, cfg, size);
}
return false;
}
} // namespace viz
+42
View File
@@ -0,0 +1,42 @@
// viz_render — dispatcher de visualizaciones ImPlot sobre StageOutput.
// Cada modo elige automaticamente las columnas relevantes (primera categorica,
// primera o varias numericas) salvo override desde ViewConfig.
// Issue 0081-G. Promovido desde cpp/apps/primitives_gallery/playground/tables/viz.h
#pragma once
#include "core/data_table_types.h"
#include "imgui.h"
#include <vector>
#include <string>
namespace viz {
// Render principal. Devuelve true si renderiza el modo solicitado, false si
// no se cumplen pre-condiciones (faltan cols numericas/categoricas etc.).
//
// `size`: ImVec2(-1,-1) usa todo el espacio disponible.
// `out`: output del stage activo (headers, types, cells flat row-major).
// `clicked_row_out`: si != nullptr, el render escribira el indice de row del
// `StageOutput` clicado por user. -1 si no hubo click drillable. Fase 10
// (issue 0079): habilitado para bar/column/pie/donut/funnel/scatter/bubble/
// heatmap. Resto de modos: no hit-test, queda en -1.
bool render(const data_table::StageOutput& out,
data_table::ViewMode mode,
const data_table::ViewConfig& cfg,
ImVec2 size = ImVec2(-1, -1),
int* clicked_row_out = nullptr);
// Helper expuesto: encuentra primera col numerica. -1 si ninguna.
int first_numeric_col(const data_table::StageOutput& out);
// Helper: primera col categorica (String/Date/Bool/Json o Int con muchos
// uniques bajos — heuristica). -1 si ninguna.
int first_category_col(const data_table::StageOutput& out);
// Helper: extrae columna como vector<double>. Cells no parseables -> NaN.
std::vector<double> extract_numeric(const data_table::StageOutput& out, int col);
// Helper: extrae columna como vector<string> (categorias).
std::vector<std::string> extract_category(const data_table::StageOutput& out, int col);
} // namespace viz
+84
View File
@@ -0,0 +1,84 @@
---
name: viz_render
kind: function
lang: cpp
domain: viz
version: "1.0.0"
purity: impure
signature: "bool viz::render(const data_table::StageOutput& out, data_table::ViewMode mode, const data_table::ViewConfig& cfg, ImVec2 size = ImVec2(-1,-1), int* clicked_row_out = nullptr)"
description: "Dispatcher de visualizaciones ImPlot sobre StageOutput. Cada modo (bar/column/pie/donut/funnel/scatter/bubble/heatmap/line/area/stacked/etc.) elige automaticamente columnas relevantes salvo override desde ViewConfig. Hit-test devuelve clicked_row_out para drill-down en los modos que lo soportan."
tags: [tables, viz, implot, tql, cpp-tables, dispatcher, drilldown]
uses_functions: []
uses_types:
- data_table_types_cpp_core
returns: []
returns_optional: false
error_type: "error_go_core"
imports: [imgui, implot]
tested: true
tests:
- "first_numeric_col returns -1 on empty output"
- "first_numeric_col returns 0 for all-numeric output"
- "first_numeric_col skips string columns"
- "first_category_col returns -1 on all-numeric output"
- "first_category_col returns first string column"
- "extract_numeric returns NaN for unparseable cells"
- "extract_numeric returns empty for out-of-range col"
- "extract_category returns empty strings for null cells"
- "extract_category returns empty for out-of-range col"
test_file_path: "cpp/tests/test_viz_render.cpp"
file_path: "cpp/functions/viz/viz_render.cpp"
framework: imgui
params:
- name: out
desc: "StageOutput del stage activo: headers, types, cells row-major (output de compute_stage o compute_pipeline)"
- name: mode
desc: "ViewMode a renderizar: Bar, Column, GroupedBar, StackedBar, Line, Area, Stairs, Scatter, Bubble, Histogram, Histogram2D, Heatmap, BoxPlot, Stem, ErrorBars, Pie, Donut, Funnel, Waterfall, KPI, KPIGrid, Candlestick, Radar"
- name: cfg
desc: "ViewConfig con overrides de auto-detect: x_col, y_cols, cat_col, size_col, primary_color, hist_bins, pie_radius, show_legend, show_markers, locked, fit_request"
- name: size
desc: "Tamano en pixeles del plot. ImVec2(-1,-1) usa todo el espacio disponible del contenedor ImGui"
- name: clicked_row_out
desc: "Output param: si != nullptr y el usuario clico un punto drillable, se escribe el indice de row en StageOutput. -1 si no hubo click. Solo activo en: Bar, Column, Pie, Donut, Funnel, Scatter, Bubble, Heatmap"
output: "true si el modo fue renderizado correctamente; false si faltan columnas requeridas (se muestra texto de ayuda centrado) o si out esta vacio"
---
## Ejemplo
```cpp
#include "viz/viz_render.h"
// Construir StageOutput trivial (3 filas, 2 cols: categoria + numerica)
data_table::StageOutput out;
out.headers = {"categoria", "valor"};
out.types = {data_table::ColumnType::String, data_table::ColumnType::Float};
out.rows = 3; out.cols = 2;
// Cells row-major: category0, val0, category1, val1, category2, val2
static const char* raw[] = {"alfa", "10.0", "beta", "25.5", "gamma", "7.2"};
for (auto p : raw) out.cells.push_back(p);
data_table::ViewConfig cfg;
int clicked = -1;
bool ok = viz::render(out, data_table::ViewMode::Bar, cfg,
ImVec2(-1, 300), &clicked);
if (ok && clicked >= 0) {
// clicked = indice de row clicado por el usuario
}
```
## Cuando usarla
Cuando tienes un `StageOutput` (salida de `compute_stage` o `compute_pipeline`) y quieres renderizarlo visualmente en un panel ImGui. El dispatcher elige las columnas correctas automaticamente; usa `ViewConfig` para forzar columnas concretas o cambiar colores/leyenda/zoom.
## Gotchas
- Requiere contexto ImGui y contexto ImPlot vivos en el hilo actual. Llamar solo desde dentro del loop de render de `fn::run_app` (entre `ImGui::NewFrame()` y `ImGui::Render()`).
- El hit-test (`clicked_row_out`) solo esta activo en los modos: Bar, Column, Pie, Donut, Funnel, Scatter, Bubble, Heatmap. En el resto de modos `*clicked_row_out` queda en -1.
- Thread-safety: render DEBE llamarse desde el mismo hilo que `ImGui::NewFrame()`. No hay mutex interno.
- Para ViewMode::Table la funcion devuelve `false` inmediatamente (la tabla se renderiza con `table_view_cpp_viz`, no con este dispatcher).
- `cfg.fit_request` es `mutable`: la funcion lo consume (pone a false) al hacer el fit. Si pasas un `const ViewConfig&` el campo mutado no se propaga al caller salvo que sea el mismo objeto.
- Modo Candlestick asume que las 4 primeras columnas numericas son O/H/L/C en ese orden. Modo Radar usa solo la primera fila como poligono.
## Tests parciales
`render()` requiere ImPlot context vivo — no se puede ejercitar sin ventana. Los tests de este archivo cubren las funciones helper puras (`first_numeric_col`, `first_category_col`, `extract_numeric`, `extract_category`) que no dependen de ImGui/ImPlot. Smoke real del dispatcher via `primitives_gallery --capture` (golden images, issue 0048).