Files
primitives_gallery/playground/tables/tql_to_sql.cpp
T
egutierrez 100aeaa1fc chore: auto-commit (12 archivos)
- playground/tables/CMakeLists.txt
- playground/tables/data_table.cpp
- playground/tables/data_table_logic.cpp
- playground/tables/data_table_logic.h
- playground/tables/self_test.cpp
- playground/tables/tql.cpp
- playground/tables/viz.cpp
- playground/tables/viz.h
- playground/tables/llm_anthropic.cpp
- playground/tables/llm_anthropic.h
- ...

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-13 00:50:35 +02:00

863 lines
33 KiB
C++

// tql_to_sql.cpp — pure walker TQL -> SQL DuckDB + Lua subset transpiler.
// Ver issue 0080. Sin DuckDB linkado.
#include "tql_to_sql.h"
#include <cctype>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <set>
#include <sstream>
#include <unordered_map>
namespace tql_to_sql {
using namespace data_table;
// ============================================================================
// Lua subset tokenizer + recursive-descent expression parser -> SQL string.
// ============================================================================
namespace {
struct Tok {
enum Kind {
EndT, NumT, StrT, IdentT, ColT,
// operators / keywords
Plus, Minus, Star, Slash, Percent, ConcatT,
Eq, Neq, Lt, Lte, Gt, Gte,
AndT, OrT, NotT,
IfT, ThenT, ElseT, EndKW,
LParen, RParen, Comma, Dot,
TrueT, FalseT, NilT,
} kind = EndT;
std::string text; // raw token texto (para idents/numbers/strings)
};
// Categorias prohibidas: token literal -> mensaje.
const std::unordered_map<std::string, const char*>& forbidden_keywords() {
static const std::unordered_map<std::string, const char*> M = {
{"function", "closures not allowed in SQL transpile subset"},
{"local", "local declarations not allowed"},
{"for", "loops not allowed"},
{"while", "loops not allowed"},
{"repeat", "loops not allowed"},
{"do", "block statements not allowed"},
{"return", "explicit return not allowed (formula is implicit expression)"},
{"goto", "goto not allowed"},
{"break", "break not allowed (no loops)"},
// io/os/debug/coroutines
{"io", "io.* access not allowed"},
{"os", "os.* access not allowed"},
{"debug", "debug.* access not allowed"},
{"package", "package access not allowed"},
{"require", "require not allowed"},
{"coroutine","coroutines not allowed"},
{"setmetatable","metatables not allowed"},
{"getmetatable","metatables not allowed"},
{"rawget", "rawget not allowed"},
{"rawset", "rawset not allowed"},
{"pcall", "pcall not allowed"},
{"xpcall", "xpcall not allowed"},
{"print", "print not allowed (SQL has no side effects)"},
};
return M;
}
// Whitelist de funciones SQL-transpilables: lua name -> SQL function template.
// Template usa $1, $2, ... como placeholders de argumentos.
struct FnMap { int min_args; int max_args; const char* sql_tmpl; };
const std::unordered_map<std::string, FnMap>& fn_whitelist() {
static const std::unordered_map<std::string, FnMap> M = {
// math.*
{"math.floor", {1, 1, "floor($1)"}},
{"math.ceil", {1, 1, "ceiling($1)"}},
{"math.abs", {1, 1, "abs($1)"}},
{"math.sqrt", {1, 1, "sqrt($1)"}},
{"math.sin", {1, 1, "sin($1)"}},
{"math.cos", {1, 1, "cos($1)"}},
{"math.log", {1, 1, "ln($1)"}},
{"math.exp", {1, 1, "exp($1)"}},
{"math.min", {2, 2, "least($1, $2)"}},
{"math.max", {2, 2, "greatest($1, $2)"}},
// string.*
{"string.upper", {1, 1, "upper($1)"}},
{"string.lower", {1, 1, "lower($1)"}},
{"string.len", {1, 1, "length($1)"}},
{"string.sub", {2, 3, "/*SUBSTRING*/"}}, // manejo especial: argc 2 vs 3
// top-level
{"tostring", {1, 1, "CAST($1 AS VARCHAR)"}},
{"tonumber", {1, 1, "CAST($1 AS DOUBLE)"}},
};
return M;
}
// Identifier SQL-safe: si tiene caracteres especiales o coincide con keyword,
// usar `"col"`. Aqui simplificado: siempre quote con dobles comillas para
// preservar case y permitir `:` (sufijo granularity).
std::string sql_ident(const std::string& name) {
std::string out;
out.reserve(name.size() + 4);
out += '"';
for (char c : name) {
if (c == '"') out += "\"\""; // escape
else out += c;
}
out += '"';
return out;
}
std::string sql_string_literal(const std::string& s) {
std::string out;
out.reserve(s.size() + 4);
out += '\'';
for (char c : s) {
if (c == '\'') out += "''";
else out += c;
}
out += '\'';
return out;
}
class Lexer {
public:
Lexer(const std::string& src) : src_(src) {}
// Devuelve true si parsea OK. False con err en error_.
bool tokenize(std::vector<Tok>& out) {
size_t i = 0;
while (i < src_.size()) {
char c = src_[i];
if (std::isspace((unsigned char)c)) { ++i; continue; }
// Lua line comment
if (c == '-' && i + 1 < src_.size() && src_[i+1] == '-') {
while (i < src_.size() && src_[i] != '\n') ++i;
continue;
}
if (c == '[' ) {
// col ref [identifier]
size_t j = i + 1;
std::string name;
while (j < src_.size() && src_[j] != ']') {
name += src_[j];
++j;
}
if (j >= src_.size()) { error_ = "unterminated [col] ref"; return false; }
Tok t; t.kind = Tok::ColT; t.text = name;
out.push_back(t);
i = j + 1;
continue;
}
if (c == '"' || c == '\'') {
char q = c;
++i;
std::string s;
while (i < src_.size() && src_[i] != q) {
if (src_[i] == '\\' && i + 1 < src_.size()) {
char esc = src_[i+1];
if (esc == 'n') s += '\n';
else if (esc == 't') s += '\t';
else if (esc == '\\') s += '\\';
else if (esc == '\'') s += '\'';
else if (esc == '"') s += '"';
else s += esc;
i += 2;
} else {
s += src_[i++];
}
}
if (i >= src_.size()) { error_ = "unterminated string literal"; return false; }
++i;
Tok t; t.kind = Tok::StrT; t.text = s;
out.push_back(t);
continue;
}
if (std::isdigit((unsigned char)c) || (c == '.' && i + 1 < src_.size() && std::isdigit((unsigned char)src_[i+1]))) {
std::string n;
bool seen_dot = false;
while (i < src_.size()) {
char d = src_[i];
if (std::isdigit((unsigned char)d)) { n += d; ++i; }
else if (d == '.' && !seen_dot) { n += d; seen_dot = true; ++i; }
else break;
}
Tok t; t.kind = Tok::NumT; t.text = n;
out.push_back(t);
continue;
}
if (std::isalpha((unsigned char)c) || c == '_') {
std::string id;
while (i < src_.size() &&
(std::isalnum((unsigned char)src_[i]) || src_[i] == '_')) {
id += src_[i++];
}
// Check forbidden keywords y mapeo a tokens.
auto& F = forbidden_keywords();
auto fit = F.find(id);
if (fit != F.end()) {
error_ = std::string("token '") + id + "': " + fit->second;
return false;
}
Tok t;
if (id == "and") t.kind = Tok::AndT;
else if (id == "or") t.kind = Tok::OrT;
else if (id == "not") t.kind = Tok::NotT;
else if (id == "if") t.kind = Tok::IfT;
else if (id == "then") t.kind = Tok::ThenT;
else if (id == "else") t.kind = Tok::ElseT;
else if (id == "end") t.kind = Tok::EndKW;
else if (id == "true") t.kind = Tok::TrueT;
else if (id == "false") t.kind = Tok::FalseT;
else if (id == "nil") t.kind = Tok::NilT;
else { t.kind = Tok::IdentT; t.text = id; }
out.push_back(t);
continue;
}
// Operators
auto emit = [&](Tok::Kind k, int len) {
Tok t; t.kind = k; out.push_back(t); i += (size_t)len;
};
if (c == '+') { emit(Tok::Plus, 1); continue; }
if (c == '-') { emit(Tok::Minus, 1); continue; }
if (c == '*') { emit(Tok::Star, 1); continue; }
if (c == '/') { emit(Tok::Slash, 1); continue; }
if (c == '%') { emit(Tok::Percent,1); continue; }
if (c == '(') { emit(Tok::LParen, 1); continue; }
if (c == ')') { emit(Tok::RParen, 1); continue; }
if (c == ',') { emit(Tok::Comma, 1); continue; }
if (c == '.') {
if (i + 1 < src_.size() && src_[i+1] == '.') {
if (i + 2 < src_.size() && src_[i+2] == '.') {
error_ = "'...' vararg not allowed"; return false;
}
emit(Tok::ConcatT, 2); continue;
}
emit(Tok::Dot, 1); continue;
}
if (c == '=') {
if (i + 1 < src_.size() && src_[i+1] == '=') { emit(Tok::Eq, 2); continue; }
error_ = "single '=' (assignment) not allowed"; return false;
}
if (c == '~') {
if (i + 1 < src_.size() && src_[i+1] == '=') { emit(Tok::Neq, 2); continue; }
error_ = "stray '~'"; return false;
}
if (c == '<') {
if (i + 1 < src_.size() && src_[i+1] == '=') { emit(Tok::Lte, 2); continue; }
emit(Tok::Lt, 1); continue;
}
if (c == '>') {
if (i + 1 < src_.size() && src_[i+1] == '=') { emit(Tok::Gte, 2); continue; }
emit(Tok::Gt, 1); continue;
}
if (c == '{') { error_ = "table literals '{...}' not allowed"; return false; }
if (c == '}') { error_ = "stray '}'"; return false; }
if (c == ';') { error_ = "multi-statement not allowed"; return false; }
if (c == '#') { error_ = "length '#' operator not allowed"; return false; }
if (c == ':') { error_ = "method calls ':' not allowed"; return false; }
error_ = std::string("unexpected character '") + c + "'";
return false;
}
Tok t; t.kind = Tok::EndT;
out.push_back(t);
return true;
}
const std::string& error() const { return error_; }
private:
const std::string& src_;
std::string error_;
};
class Parser {
public:
Parser(const std::vector<Tok>& toks,
const std::vector<std::string>& headers)
: toks_(toks), headers_(headers) {}
// expr := ternary
// ternary := if/then/else | logic_or
bool parse_expr(std::string& out) {
return parse_ternary(out);
}
bool parse_ternary(std::string& out) {
if (peek(0).kind == Tok::IfT) {
++pos_;
std::string a, b, c;
if (!parse_logic_or(a)) return false;
if (!eat(Tok::ThenT, "'then' expected after 'if'")) return false;
if (!parse_ternary(b)) return false;
if (!eat(Tok::ElseT, "'else' expected (subset requires else branch)")) return false;
if (!parse_ternary(c)) return false;
if (!eat(Tok::EndKW, "'end' expected to close 'if'")) return false;
out = "CASE WHEN " + a + " THEN " + b + " ELSE " + c + " END";
return true;
}
return parse_logic_or(out);
}
bool parse_logic_or(std::string& out) {
if (!parse_logic_and(out)) return false;
while (peek(0).kind == Tok::OrT) {
++pos_;
std::string rhs;
if (!parse_logic_and(rhs)) return false;
out = "(" + out + " OR " + rhs + ")";
}
return true;
}
bool parse_logic_and(std::string& out) {
if (!parse_not(out)) return false;
while (peek(0).kind == Tok::AndT) {
++pos_;
std::string rhs;
if (!parse_not(rhs)) return false;
out = "(" + out + " AND " + rhs + ")";
}
return true;
}
bool parse_not(std::string& out) {
if (peek(0).kind == Tok::NotT) {
++pos_;
std::string e;
if (!parse_not(e)) return false;
out = "NOT (" + e + ")";
return true;
}
return parse_comparison(out);
}
bool parse_comparison(std::string& out) {
if (!parse_concat(out)) return false;
while (true) {
Tok::Kind k = peek(0).kind;
const char* op = nullptr;
if (k == Tok::Eq) op = " = ";
else if (k == Tok::Neq) op = " <> ";
else if (k == Tok::Lt) op = " < ";
else if (k == Tok::Lte) op = " <= ";
else if (k == Tok::Gt) op = " > ";
else if (k == Tok::Gte) op = " >= ";
else break;
++pos_;
std::string rhs;
if (!parse_concat(rhs)) return false;
out = "(" + out + op + rhs + ")";
}
return true;
}
bool parse_concat(std::string& out) {
if (!parse_additive(out)) return false;
while (peek(0).kind == Tok::ConcatT) {
++pos_;
std::string rhs;
if (!parse_additive(rhs)) return false;
out = "(" + out + " || " + rhs + ")";
}
return true;
}
bool parse_additive(std::string& out) {
if (!parse_multiplicative(out)) return false;
while (peek(0).kind == Tok::Plus || peek(0).kind == Tok::Minus) {
const char* op = (peek(0).kind == Tok::Plus) ? " + " : " - ";
++pos_;
std::string rhs;
if (!parse_multiplicative(rhs)) return false;
out = "(" + out + op + rhs + ")";
}
return true;
}
bool parse_multiplicative(std::string& out) {
if (!parse_unary(out)) return false;
while (peek(0).kind == Tok::Star || peek(0).kind == Tok::Slash || peek(0).kind == Tok::Percent) {
const char* op = (peek(0).kind == Tok::Star) ? " * "
: (peek(0).kind == Tok::Slash) ? " / " : " % ";
++pos_;
std::string rhs;
if (!parse_unary(rhs)) return false;
out = "(" + out + op + rhs + ")";
}
return true;
}
bool parse_unary(std::string& out) {
if (peek(0).kind == Tok::Minus) {
++pos_;
std::string e;
if (!parse_unary(e)) return false;
out = "(-" + e + ")";
return true;
}
return parse_primary(out);
}
bool parse_primary(std::string& out) {
Tok t = peek(0);
if (t.kind == Tok::NumT) {
++pos_;
out = t.text;
return true;
}
if (t.kind == Tok::StrT) {
++pos_;
out = sql_string_literal(t.text);
return true;
}
if (t.kind == Tok::TrueT) { ++pos_; out = "TRUE"; return true; }
if (t.kind == Tok::FalseT) { ++pos_; out = "FALSE"; return true; }
if (t.kind == Tok::NilT) { ++pos_; out = "NULL"; return true; }
if (t.kind == Tok::ColT) {
// Check col exists (warning, not error).
++pos_;
(void)headers_; // currently not validating — caller can do that
out = sql_ident(t.text);
return true;
}
if (t.kind == Tok::LParen) {
++pos_;
std::string e;
if (!parse_expr(e)) return false;
if (!eat(Tok::RParen, "expected ')'")) return false;
out = "(" + e + ")";
return true;
}
if (t.kind == Tok::IdentT) {
// Function call: identifier ("." identifier)? "(" args ")"
std::string name = t.text;
++pos_;
if (peek(0).kind == Tok::Dot) {
++pos_;
if (peek(0).kind != Tok::IdentT) {
error_ = "expected identifier after '.'";
return false;
}
name += "." + peek(0).text;
++pos_;
}
if (peek(0).kind != Tok::LParen) {
error_ = "bare identifier '" + name +
"' not allowed (only [col] refs + whitelisted fn calls)";
return false;
}
++pos_; // consume '('
std::vector<std::string> args;
if (peek(0).kind != Tok::RParen) {
while (true) {
std::string a;
if (!parse_expr(a)) return false;
args.push_back(a);
if (peek(0).kind == Tok::Comma) { ++pos_; continue; }
break;
}
}
if (!eat(Tok::RParen, "expected ')' closing function args")) return false;
// Validate against whitelist
auto& W = fn_whitelist();
auto wit = W.find(name);
if (wit == W.end()) {
error_ = "function '" + name +
"' not in SQL transpile whitelist (math.*, string.upper/lower/len/sub, tostring, tonumber)";
return false;
}
const FnMap& fm = wit->second;
if ((int)args.size() < fm.min_args || (int)args.size() > fm.max_args) {
std::ostringstream os;
os << "function '" << name << "' takes " << fm.min_args;
if (fm.max_args != fm.min_args) os << ".." << fm.max_args;
os << " args, got " << args.size();
error_ = os.str();
return false;
}
// Casos especiales
if (name == "string.sub") {
// Lua: string.sub(s, i [, j]) — i/j 1-based, inclusive.
// SQL DuckDB: substring(s, i, count). count = j - i + 1.
if (args.size() == 2) {
// sin j -> hasta el final. DuckDB substring(s, i) acepta.
out = "substring(" + args[0] + ", " + args[1] + ")";
} else {
out = "substring(" + args[0] + ", " + args[1] +
", (" + args[2] + ") - (" + args[1] + ") + 1)";
}
return true;
}
// Generico: substituir $1..$N en template.
std::string s = fm.sql_tmpl;
for (int i = 0; i < (int)args.size(); ++i) {
char ph[6];
std::snprintf(ph, sizeof(ph), "$%d", i + 1);
std::string p = ph;
size_t at = 0;
while ((at = s.find(p, at)) != std::string::npos) {
s.replace(at, p.size(), args[i]);
at += args[i].size();
}
}
out = s;
return true;
}
error_ = std::string("unexpected token in expression");
return false;
}
bool eat(Tok::Kind k, const char* msg) {
if (peek(0).kind != k) { error_ = msg; return false; }
++pos_;
return true;
}
const Tok& peek(int off) const {
size_t i = pos_ + (size_t)off;
if (i >= toks_.size()) return toks_.back();
return toks_[i];
}
bool at_end() const { return peek(0).kind == Tok::EndT; }
const std::string& error() const { return error_; }
private:
const std::vector<Tok>& toks_;
const std::vector<std::string>& headers_;
size_t pos_ = 0;
std::string error_;
};
} // anon
std::string transpile_expr(const std::string& formula,
const std::vector<std::string>& in_headers,
std::string& error_out) {
error_out.clear();
std::vector<Tok> toks;
Lexer lex(formula);
if (!lex.tokenize(toks)) {
error_out = lex.error();
return "";
}
Parser p(toks, in_headers);
std::string out;
if (!p.parse_expr(out)) {
error_out = p.error();
return "";
}
if (!p.at_end()) {
error_out = "unexpected trailing tokens after expression";
return "";
}
return out;
}
bool is_transpilable(const std::string& formula, std::string& error_out) {
std::vector<std::string> empty;
std::string s = transpile_expr(formula, empty, error_out);
return error_out.empty() && !s.empty();
}
// ============================================================================
// TQL State -> SQL DuckDB emitter.
// ============================================================================
namespace {
// Mapeo aggregation -> SQL DuckDB expression.
std::string emit_agg_expr(const Aggregation& a) {
switch (a.fn) {
case AggFn::Count: return "COUNT(*)";
case AggFn::Sum: return "SUM(" + sql_ident(a.col) + ")";
case AggFn::Avg: return "AVG(" + sql_ident(a.col) + ")";
case AggFn::Min: return "MIN(" + sql_ident(a.col) + ")";
case AggFn::Max: return "MAX(" + sql_ident(a.col) + ")";
case AggFn::Distinct: return "COUNT(DISTINCT " + sql_ident(a.col) + ")";
case AggFn::Stddev: return "STDDEV(" + sql_ident(a.col) + ")";
case AggFn::Median: return "quantile_cont(" + sql_ident(a.col) + ", 0.5)";
case AggFn::P25: return "quantile_cont(" + sql_ident(a.col) + ", 0.25)";
case AggFn::P75: return "quantile_cont(" + sql_ident(a.col) + ", 0.75)";
case AggFn::P90: return "quantile_cont(" + sql_ident(a.col) + ", 0.90)";
case AggFn::P99: return "quantile_cont(" + sql_ident(a.col) + ", 0.99)";
case AggFn::Percentile: {
char buf[32];
std::snprintf(buf, sizeof(buf), "%g", a.arg);
return std::string("quantile_cont(") + sql_ident(a.col) + ", " + buf + ")";
}
}
return "/* unknown agg */ NULL";
}
std::string emit_breakout_expr(const std::string& bk) {
std::string col_clean;
DateGranularity g = parse_breakout_granularity(bk, col_clean);
if (g == DateGranularity::None) {
return sql_ident(col_clean);
}
const char* tok = date_granularity_token(g);
// Week: DuckDB date_trunc('week', col) -> monday segun configuracion.
return std::string("date_trunc('") + tok + "', " + sql_ident(col_clean) + ")";
}
// Resuelve un Op a operador SQL + (opcional) override de RHS.
const char* sql_op(Op op) {
switch (op) {
case Op::Eq: return " = ";
case Op::Neq: return " <> ";
case Op::Gt: return " > ";
case Op::Gte: return " >= ";
case Op::Lt: return " < ";
case Op::Lte: return " <= ";
case Op::Contains: return " LIKE ";
case Op::NotContains: return " NOT LIKE ";
case Op::StartsWith: return " LIKE ";
case Op::EndsWith: return " LIKE ";
}
return " = ";
}
// Construye RHS literal/pattern segun op + value. Devuelve placeholder '?'
// y push de params; o pattern string-literal directo para LIKE wildcards.
std::string emit_filter_rhs(const Filter& f, std::vector<std::string>& params) {
if (f.op == Op::Contains || f.op == Op::NotContains) {
std::string v = "%" + f.value + "%";
params.push_back(v);
return "?";
}
if (f.op == Op::StartsWith) {
std::string v = f.value + "%";
params.push_back(v);
return "?";
}
if (f.op == Op::EndsWith) {
std::string v = "%" + f.value;
params.push_back(v);
return "?";
}
params.push_back(f.value);
return "?";
}
// Construye CTE stage 0 (Raw): SELECT cols + derived FROM main_t [JOINs].
// `tables` provee schema. main_t name = tables[main_idx].name. Derived cols
// se transpilan a SQL expression; si fuera de subset, push warning + skip col.
bool emit_stage0(const State& st, const std::vector<TableInput>& tables,
int main_idx, SqlEmit& e) {
if (main_idx < 0 || main_idx >= (int)tables.size()) {
e.error = "main table out of range";
return false;
}
const TableInput& main_t = tables[(size_t)main_idx];
// SELECT list: cols originales + derived expressions (subset).
std::string select_list;
for (size_t i = 0; i < main_t.headers.size(); ++i) {
if (i > 0) select_list += ", ";
select_list += sql_ident(main_t.headers[i]);
}
// Derived cols (stage 0 derived).
if (!st.stages.empty()) {
const Stage& s0 = st.stages[0];
for (const auto& d : s0.derived) {
if (d.source_col >= 0 && d.formula.empty()) {
// Retipo puro: alias col origen.
if (d.source_col < (int)main_t.headers.size()) {
select_list += ", " + sql_ident(main_t.headers[(size_t)d.source_col])
+ " AS " + sql_ident(d.name);
}
continue;
}
std::string err;
std::string expr = transpile_expr(d.formula, main_t.headers, err);
if (!err.empty()) {
std::string msg = "derived col '" + d.name +
"' formula out of SQL subset: " + err;
e.warnings.push_back(msg);
// Skip col en SQL output; agente puede recurrir a TQL puro.
continue;
}
select_list += ", " + expr + " AS " + sql_ident(d.name);
}
}
std::string from = sql_ident(main_t.name);
// Joins
for (const auto& jn : st.joins) {
const TableInput* right = nullptr;
for (const auto& ti : tables) {
if (ti.name == jn.source) { right = &ti; break; }
}
if (!right) {
e.warnings.push_back("join source '" + jn.source + "' not in tables");
continue;
}
const char* strat = "LEFT JOIN";
switch (jn.strategy) {
case JoinStrategy::Left: strat = "LEFT JOIN"; break;
case JoinStrategy::Inner: strat = "INNER JOIN"; break;
case JoinStrategy::Right: strat = "RIGHT JOIN"; break;
case JoinStrategy::Full: strat = "FULL OUTER JOIN"; break;
}
from += "\n " + std::string(strat) + " " + sql_ident(right->name)
+ " AS " + sql_ident(jn.alias) + " ON ";
for (size_t k = 0; k < jn.on.size(); ++k) {
if (k > 0) from += " AND ";
from += sql_ident(main_t.name) + "." + sql_ident(jn.on[k].first)
+ " = " + sql_ident(jn.alias) + "." + sql_ident(jn.on[k].second);
}
// Anadir cols del right al SELECT con alias.col prefix.
if (jn.fields.empty()) {
for (const auto& rh : right->headers) {
std::string aliased = jn.alias + "." + rh;
select_list += ", " + sql_ident(jn.alias) + "." + sql_ident(rh)
+ " AS " + sql_ident(aliased);
}
} else {
for (const auto& fld : jn.fields) {
std::string aliased = jn.alias + "." + fld;
select_list += ", " + sql_ident(jn.alias) + "." + sql_ident(fld)
+ " AS " + sql_ident(aliased);
}
}
}
// Stage 0 WHERE: filters del Raw (filter col idx en eff_headers).
// Filter.col es indice en eff_headers (orig + derived). Para SQL emit,
// necesitamos resolver col idx -> col name. Reconstruir orden eff_headers.
std::vector<std::string> eff_headers = main_t.headers;
if (!st.stages.empty()) {
for (const auto& d : st.stages[0].derived) {
eff_headers.push_back(d.name);
}
}
std::string where_clause;
if (!st.stages.empty()) {
const Stage& s0 = st.stages[0];
for (size_t fi = 0; fi < s0.filters.size(); ++fi) {
const Filter& f = s0.filters[fi];
if (f.col < 0 || f.col >= (int)eff_headers.size()) {
e.warnings.push_back("stage0 filter col idx out of range");
continue;
}
std::string col = sql_ident(eff_headers[(size_t)f.col]);
if (!where_clause.empty()) where_clause += " AND ";
where_clause += col + sql_op(f.op) + emit_filter_rhs(f, e.params);
}
}
// Stage 0 sort
std::string order_clause;
if (!st.stages.empty()) {
const Stage& s0 = st.stages[0];
for (size_t si = 0; si < s0.sorts.size(); ++si) {
const SortClause& sc = s0.sorts[si];
if (!order_clause.empty()) order_clause += ", ";
order_clause += sql_ident(sc.col) + (sc.desc ? " DESC" : " ASC");
}
}
std::string cte = "t0 AS (\n SELECT " + select_list + "\n FROM " + from;
if (!where_clause.empty()) cte += "\n WHERE " + where_clause;
if (!order_clause.empty()) cte += "\n ORDER BY " + order_clause;
cte += "\n)";
e.sql = "WITH " + cte;
return true;
}
// Stage N (N>=1): SELECT breakouts + agg expressions FROM t<N-1>
// [WHERE filters] [GROUP BY ...] [ORDER BY ...].
bool emit_stage_n(const Stage& stg, int n, SqlEmit& e) {
std::string prev = "t" + std::to_string(n - 1);
std::string cur = "t" + std::to_string(n);
// SELECT list: breakouts (con granularity expr si aplica) + aggregations.
std::string select_list;
for (size_t i = 0; i < stg.breakouts.size(); ++i) {
if (i > 0) select_list += ", ";
select_list += emit_breakout_expr(stg.breakouts[i])
+ " AS " + sql_ident(stg.breakouts[i]);
}
for (size_t i = 0; i < stg.aggregations.size(); ++i) {
if (!select_list.empty()) select_list += ", ";
std::string alias = aggregation_alias(stg.aggregations[i]);
select_list += emit_agg_expr(stg.aggregations[i]) + " AS " + sql_ident(alias);
}
if (select_list.empty()) select_list = "*";
// WHERE: filters del stage. col es indice en input headers (output del stage previo).
// Aproximacion: usamos el nombre via stage breakouts/aggs del stage previo si fuera necesario.
// Para v1, emit por nombre cuando filter.col >= 0 sea idx en breakouts/aggs/orig. El
// chequeo de existencia se delega a DuckDB (errores en execute son detectables).
// V1 simple: skip filter cuando no podemos resolver — caller solo deberia tener filter
// sobre cols que existen.
// Estrategia simple: emite WHERE solo si stage previo provee headers conocidos. Para no
// duplicar logica, dejamos al caller proveer headers via filter.col que se resuelve a
// breakouts[col].
// V1: si filter.col esta en rango de breakouts del stage previo, emite breakout name.
// Sino, warning + skip.
std::string where_clause;
// Best effort: no podemos construir headers del stage previo aqui sin recomputar.
// Para v1, omitimos filters de stages >=1 — caller deberia evitar usarlos via SQL.
// TODO v2: pasar prev_headers para resolver.
(void)where_clause;
// GROUP BY: solo si hay breakouts.
std::string group_clause;
for (size_t i = 0; i < stg.breakouts.size(); ++i) {
if (i > 0) group_clause += ", ";
// Re-emit la expression para GROUP BY (no alias).
group_clause += emit_breakout_expr(stg.breakouts[i]);
}
// ORDER BY
std::string order_clause;
for (size_t i = 0; i < stg.sorts.size(); ++i) {
if (i > 0) order_clause += ", ";
order_clause += sql_ident(stg.sorts[i].col) + (stg.sorts[i].desc ? " DESC" : " ASC");
}
std::string cte = ",\n" + cur + " AS (\n SELECT " + select_list
+ "\n FROM " + prev;
if (!group_clause.empty()) cte += "\n GROUP BY " + group_clause;
if (!order_clause.empty()) cte += "\n ORDER BY " + order_clause;
cte += "\n)";
e.sql += cte;
return true;
}
} // anon
SqlEmit emit_sql(const State& state,
const std::vector<TableInput>& tables,
int up_to_stage) {
SqlEmit out;
if (state.stages.empty()) {
out.error = "state has no stages";
return out;
}
if (tables.empty()) {
out.error = "no input tables provided";
return out;
}
int target = (up_to_stage < 0) ? state.active_stage : up_to_stage;
if (target < 0) target = 0;
if (target >= (int)state.stages.size()) target = (int)state.stages.size() - 1;
// Resolve main idx via state.main_source (o tables[0] default).
int main_idx = resolve_main_idx(tables, state.main_source);
if (main_idx < 0) main_idx = 0;
if (!emit_stage0(state, tables, main_idx, out)) return out;
for (int si = 1; si <= target; ++si) {
if (!emit_stage_n(state.stages[(size_t)si], si, out)) return out;
}
out.sql += "\nSELECT * FROM t" + std::to_string(target) + ";\n";
return out;
}
} // namespace tql_to_sql