// tql_to_sql.cpp — pure walker TQL -> SQL DuckDB + Lua subset transpiler. // Ver issue 0080. Sin DuckDB linkado. #include "tql_to_sql.h" #include #include #include #include #include #include #include namespace tql_to_sql { using namespace data_table; // ============================================================================ // Lua subset tokenizer + recursive-descent expression parser -> SQL string. // ============================================================================ namespace { struct Tok { enum Kind { EndT, NumT, StrT, IdentT, ColT, // operators / keywords Plus, Minus, Star, Slash, Percent, ConcatT, Eq, Neq, Lt, Lte, Gt, Gte, AndT, OrT, NotT, IfT, ThenT, ElseT, EndKW, LParen, RParen, Comma, Dot, TrueT, FalseT, NilT, } kind = EndT; std::string text; // raw token texto (para idents/numbers/strings) }; // Categorias prohibidas: token literal -> mensaje. const std::unordered_map& forbidden_keywords() { static const std::unordered_map M = { {"function", "closures not allowed in SQL transpile subset"}, {"local", "local declarations not allowed"}, {"for", "loops not allowed"}, {"while", "loops not allowed"}, {"repeat", "loops not allowed"}, {"do", "block statements not allowed"}, {"return", "explicit return not allowed (formula is implicit expression)"}, {"goto", "goto not allowed"}, {"break", "break not allowed (no loops)"}, // io/os/debug/coroutines {"io", "io.* access not allowed"}, {"os", "os.* access not allowed"}, {"debug", "debug.* access not allowed"}, {"package", "package access not allowed"}, {"require", "require not allowed"}, {"coroutine","coroutines not allowed"}, {"setmetatable","metatables not allowed"}, {"getmetatable","metatables not allowed"}, {"rawget", "rawget not allowed"}, {"rawset", "rawset not allowed"}, {"pcall", "pcall not allowed"}, {"xpcall", "xpcall not allowed"}, {"print", "print not allowed (SQL has no side effects)"}, }; return M; } // Whitelist de funciones SQL-transpilables: lua name -> SQL function template. // Template usa $1, $2, ... como placeholders de argumentos. struct FnMap { int min_args; int max_args; const char* sql_tmpl; }; const std::unordered_map& fn_whitelist() { static const std::unordered_map M = { // math.* {"math.floor", {1, 1, "floor($1)"}}, {"math.ceil", {1, 1, "ceiling($1)"}}, {"math.abs", {1, 1, "abs($1)"}}, {"math.sqrt", {1, 1, "sqrt($1)"}}, {"math.sin", {1, 1, "sin($1)"}}, {"math.cos", {1, 1, "cos($1)"}}, {"math.log", {1, 1, "ln($1)"}}, {"math.exp", {1, 1, "exp($1)"}}, {"math.min", {2, 2, "least($1, $2)"}}, {"math.max", {2, 2, "greatest($1, $2)"}}, // string.* {"string.upper", {1, 1, "upper($1)"}}, {"string.lower", {1, 1, "lower($1)"}}, {"string.len", {1, 1, "length($1)"}}, {"string.sub", {2, 3, "/*SUBSTRING*/"}}, // manejo especial: argc 2 vs 3 // top-level {"tostring", {1, 1, "CAST($1 AS VARCHAR)"}}, {"tonumber", {1, 1, "CAST($1 AS DOUBLE)"}}, }; return M; } // Identifier SQL-safe: si tiene caracteres especiales o coincide con keyword, // usar `"col"`. Aqui simplificado: siempre quote con dobles comillas para // preservar case y permitir `:` (sufijo granularity). std::string sql_ident(const std::string& name) { std::string out; out.reserve(name.size() + 4); out += '"'; for (char c : name) { if (c == '"') out += "\"\""; // escape else out += c; } out += '"'; return out; } std::string sql_string_literal(const std::string& s) { std::string out; out.reserve(s.size() + 4); out += '\''; for (char c : s) { if (c == '\'') out += "''"; else out += c; } out += '\''; return out; } class Lexer { public: Lexer(const std::string& src) : src_(src) {} // Devuelve true si parsea OK. False con err en error_. bool tokenize(std::vector& out) { size_t i = 0; while (i < src_.size()) { char c = src_[i]; if (std::isspace((unsigned char)c)) { ++i; continue; } // Lua line comment if (c == '-' && i + 1 < src_.size() && src_[i+1] == '-') { while (i < src_.size() && src_[i] != '\n') ++i; continue; } if (c == '[' ) { // col ref [identifier] size_t j = i + 1; std::string name; while (j < src_.size() && src_[j] != ']') { name += src_[j]; ++j; } if (j >= src_.size()) { error_ = "unterminated [col] ref"; return false; } Tok t; t.kind = Tok::ColT; t.text = name; out.push_back(t); i = j + 1; continue; } if (c == '"' || c == '\'') { char q = c; ++i; std::string s; while (i < src_.size() && src_[i] != q) { if (src_[i] == '\\' && i + 1 < src_.size()) { char esc = src_[i+1]; if (esc == 'n') s += '\n'; else if (esc == 't') s += '\t'; else if (esc == '\\') s += '\\'; else if (esc == '\'') s += '\''; else if (esc == '"') s += '"'; else s += esc; i += 2; } else { s += src_[i++]; } } if (i >= src_.size()) { error_ = "unterminated string literal"; return false; } ++i; Tok t; t.kind = Tok::StrT; t.text = s; out.push_back(t); continue; } if (std::isdigit((unsigned char)c) || (c == '.' && i + 1 < src_.size() && std::isdigit((unsigned char)src_[i+1]))) { std::string n; bool seen_dot = false; while (i < src_.size()) { char d = src_[i]; if (std::isdigit((unsigned char)d)) { n += d; ++i; } else if (d == '.' && !seen_dot) { n += d; seen_dot = true; ++i; } else break; } Tok t; t.kind = Tok::NumT; t.text = n; out.push_back(t); continue; } if (std::isalpha((unsigned char)c) || c == '_') { std::string id; while (i < src_.size() && (std::isalnum((unsigned char)src_[i]) || src_[i] == '_')) { id += src_[i++]; } // Check forbidden keywords y mapeo a tokens. auto& F = forbidden_keywords(); auto fit = F.find(id); if (fit != F.end()) { error_ = std::string("token '") + id + "': " + fit->second; return false; } Tok t; if (id == "and") t.kind = Tok::AndT; else if (id == "or") t.kind = Tok::OrT; else if (id == "not") t.kind = Tok::NotT; else if (id == "if") t.kind = Tok::IfT; else if (id == "then") t.kind = Tok::ThenT; else if (id == "else") t.kind = Tok::ElseT; else if (id == "end") t.kind = Tok::EndKW; else if (id == "true") t.kind = Tok::TrueT; else if (id == "false") t.kind = Tok::FalseT; else if (id == "nil") t.kind = Tok::NilT; else { t.kind = Tok::IdentT; t.text = id; } out.push_back(t); continue; } // Operators auto emit = [&](Tok::Kind k, int len) { Tok t; t.kind = k; out.push_back(t); i += (size_t)len; }; if (c == '+') { emit(Tok::Plus, 1); continue; } if (c == '-') { emit(Tok::Minus, 1); continue; } if (c == '*') { emit(Tok::Star, 1); continue; } if (c == '/') { emit(Tok::Slash, 1); continue; } if (c == '%') { emit(Tok::Percent,1); continue; } if (c == '(') { emit(Tok::LParen, 1); continue; } if (c == ')') { emit(Tok::RParen, 1); continue; } if (c == ',') { emit(Tok::Comma, 1); continue; } if (c == '.') { if (i + 1 < src_.size() && src_[i+1] == '.') { if (i + 2 < src_.size() && src_[i+2] == '.') { error_ = "'...' vararg not allowed"; return false; } emit(Tok::ConcatT, 2); continue; } emit(Tok::Dot, 1); continue; } if (c == '=') { if (i + 1 < src_.size() && src_[i+1] == '=') { emit(Tok::Eq, 2); continue; } error_ = "single '=' (assignment) not allowed"; return false; } if (c == '~') { if (i + 1 < src_.size() && src_[i+1] == '=') { emit(Tok::Neq, 2); continue; } error_ = "stray '~'"; return false; } if (c == '<') { if (i + 1 < src_.size() && src_[i+1] == '=') { emit(Tok::Lte, 2); continue; } emit(Tok::Lt, 1); continue; } if (c == '>') { if (i + 1 < src_.size() && src_[i+1] == '=') { emit(Tok::Gte, 2); continue; } emit(Tok::Gt, 1); continue; } if (c == '{') { error_ = "table literals '{...}' not allowed"; return false; } if (c == '}') { error_ = "stray '}'"; return false; } if (c == ';') { error_ = "multi-statement not allowed"; return false; } if (c == '#') { error_ = "length '#' operator not allowed"; return false; } if (c == ':') { error_ = "method calls ':' not allowed"; return false; } error_ = std::string("unexpected character '") + c + "'"; return false; } Tok t; t.kind = Tok::EndT; out.push_back(t); return true; } const std::string& error() const { return error_; } private: const std::string& src_; std::string error_; }; class Parser { public: Parser(const std::vector& toks, const std::vector& headers) : toks_(toks), headers_(headers) {} // expr := ternary // ternary := if/then/else | logic_or bool parse_expr(std::string& out) { return parse_ternary(out); } bool parse_ternary(std::string& out) { if (peek(0).kind == Tok::IfT) { ++pos_; std::string a, b, c; if (!parse_logic_or(a)) return false; if (!eat(Tok::ThenT, "'then' expected after 'if'")) return false; if (!parse_ternary(b)) return false; if (!eat(Tok::ElseT, "'else' expected (subset requires else branch)")) return false; if (!parse_ternary(c)) return false; if (!eat(Tok::EndKW, "'end' expected to close 'if'")) return false; out = "CASE WHEN " + a + " THEN " + b + " ELSE " + c + " END"; return true; } return parse_logic_or(out); } bool parse_logic_or(std::string& out) { if (!parse_logic_and(out)) return false; while (peek(0).kind == Tok::OrT) { ++pos_; std::string rhs; if (!parse_logic_and(rhs)) return false; out = "(" + out + " OR " + rhs + ")"; } return true; } bool parse_logic_and(std::string& out) { if (!parse_not(out)) return false; while (peek(0).kind == Tok::AndT) { ++pos_; std::string rhs; if (!parse_not(rhs)) return false; out = "(" + out + " AND " + rhs + ")"; } return true; } bool parse_not(std::string& out) { if (peek(0).kind == Tok::NotT) { ++pos_; std::string e; if (!parse_not(e)) return false; out = "NOT (" + e + ")"; return true; } return parse_comparison(out); } bool parse_comparison(std::string& out) { if (!parse_concat(out)) return false; while (true) { Tok::Kind k = peek(0).kind; const char* op = nullptr; if (k == Tok::Eq) op = " = "; else if (k == Tok::Neq) op = " <> "; else if (k == Tok::Lt) op = " < "; else if (k == Tok::Lte) op = " <= "; else if (k == Tok::Gt) op = " > "; else if (k == Tok::Gte) op = " >= "; else break; ++pos_; std::string rhs; if (!parse_concat(rhs)) return false; out = "(" + out + op + rhs + ")"; } return true; } bool parse_concat(std::string& out) { if (!parse_additive(out)) return false; while (peek(0).kind == Tok::ConcatT) { ++pos_; std::string rhs; if (!parse_additive(rhs)) return false; out = "(" + out + " || " + rhs + ")"; } return true; } bool parse_additive(std::string& out) { if (!parse_multiplicative(out)) return false; while (peek(0).kind == Tok::Plus || peek(0).kind == Tok::Minus) { const char* op = (peek(0).kind == Tok::Plus) ? " + " : " - "; ++pos_; std::string rhs; if (!parse_multiplicative(rhs)) return false; out = "(" + out + op + rhs + ")"; } return true; } bool parse_multiplicative(std::string& out) { if (!parse_unary(out)) return false; while (peek(0).kind == Tok::Star || peek(0).kind == Tok::Slash || peek(0).kind == Tok::Percent) { const char* op = (peek(0).kind == Tok::Star) ? " * " : (peek(0).kind == Tok::Slash) ? " / " : " % "; ++pos_; std::string rhs; if (!parse_unary(rhs)) return false; out = "(" + out + op + rhs + ")"; } return true; } bool parse_unary(std::string& out) { if (peek(0).kind == Tok::Minus) { ++pos_; std::string e; if (!parse_unary(e)) return false; out = "(-" + e + ")"; return true; } return parse_primary(out); } bool parse_primary(std::string& out) { Tok t = peek(0); if (t.kind == Tok::NumT) { ++pos_; out = t.text; return true; } if (t.kind == Tok::StrT) { ++pos_; out = sql_string_literal(t.text); return true; } if (t.kind == Tok::TrueT) { ++pos_; out = "TRUE"; return true; } if (t.kind == Tok::FalseT) { ++pos_; out = "FALSE"; return true; } if (t.kind == Tok::NilT) { ++pos_; out = "NULL"; return true; } if (t.kind == Tok::ColT) { // Check col exists (warning, not error). ++pos_; (void)headers_; // currently not validating — caller can do that out = sql_ident(t.text); return true; } if (t.kind == Tok::LParen) { ++pos_; std::string e; if (!parse_expr(e)) return false; if (!eat(Tok::RParen, "expected ')'")) return false; out = "(" + e + ")"; return true; } if (t.kind == Tok::IdentT) { // Function call: identifier ("." identifier)? "(" args ")" std::string name = t.text; ++pos_; if (peek(0).kind == Tok::Dot) { ++pos_; if (peek(0).kind != Tok::IdentT) { error_ = "expected identifier after '.'"; return false; } name += "." + peek(0).text; ++pos_; } if (peek(0).kind != Tok::LParen) { error_ = "bare identifier '" + name + "' not allowed (only [col] refs + whitelisted fn calls)"; return false; } ++pos_; // consume '(' std::vector args; if (peek(0).kind != Tok::RParen) { while (true) { std::string a; if (!parse_expr(a)) return false; args.push_back(a); if (peek(0).kind == Tok::Comma) { ++pos_; continue; } break; } } if (!eat(Tok::RParen, "expected ')' closing function args")) return false; // Validate against whitelist auto& W = fn_whitelist(); auto wit = W.find(name); if (wit == W.end()) { error_ = "function '" + name + "' not in SQL transpile whitelist (math.*, string.upper/lower/len/sub, tostring, tonumber)"; return false; } const FnMap& fm = wit->second; if ((int)args.size() < fm.min_args || (int)args.size() > fm.max_args) { std::ostringstream os; os << "function '" << name << "' takes " << fm.min_args; if (fm.max_args != fm.min_args) os << ".." << fm.max_args; os << " args, got " << args.size(); error_ = os.str(); return false; } // Casos especiales if (name == "string.sub") { // Lua: string.sub(s, i [, j]) — i/j 1-based, inclusive. // SQL DuckDB: substring(s, i, count). count = j - i + 1. if (args.size() == 2) { // sin j -> hasta el final. DuckDB substring(s, i) acepta. out = "substring(" + args[0] + ", " + args[1] + ")"; } else { out = "substring(" + args[0] + ", " + args[1] + ", (" + args[2] + ") - (" + args[1] + ") + 1)"; } return true; } // Generico: substituir $1..$N en template. std::string s = fm.sql_tmpl; for (int i = 0; i < (int)args.size(); ++i) { char ph[6]; std::snprintf(ph, sizeof(ph), "$%d", i + 1); std::string p = ph; size_t at = 0; while ((at = s.find(p, at)) != std::string::npos) { s.replace(at, p.size(), args[i]); at += args[i].size(); } } out = s; return true; } error_ = std::string("unexpected token in expression"); return false; } bool eat(Tok::Kind k, const char* msg) { if (peek(0).kind != k) { error_ = msg; return false; } ++pos_; return true; } const Tok& peek(int off) const { size_t i = pos_ + (size_t)off; if (i >= toks_.size()) return toks_.back(); return toks_[i]; } bool at_end() const { return peek(0).kind == Tok::EndT; } const std::string& error() const { return error_; } private: const std::vector& toks_; const std::vector& headers_; size_t pos_ = 0; std::string error_; }; } // anon std::string transpile_expr(const std::string& formula, const std::vector& in_headers, std::string& error_out) { error_out.clear(); std::vector toks; Lexer lex(formula); if (!lex.tokenize(toks)) { error_out = lex.error(); return ""; } Parser p(toks, in_headers); std::string out; if (!p.parse_expr(out)) { error_out = p.error(); return ""; } if (!p.at_end()) { error_out = "unexpected trailing tokens after expression"; return ""; } return out; } bool is_transpilable(const std::string& formula, std::string& error_out) { std::vector empty; std::string s = transpile_expr(formula, empty, error_out); return error_out.empty() && !s.empty(); } // ============================================================================ // TQL State -> SQL DuckDB emitter. // ============================================================================ namespace { // Mapeo aggregation -> SQL DuckDB expression. std::string emit_agg_expr(const Aggregation& a) { switch (a.fn) { case AggFn::Count: return "COUNT(*)"; case AggFn::Sum: return "SUM(" + sql_ident(a.col) + ")"; case AggFn::Avg: return "AVG(" + sql_ident(a.col) + ")"; case AggFn::Min: return "MIN(" + sql_ident(a.col) + ")"; case AggFn::Max: return "MAX(" + sql_ident(a.col) + ")"; case AggFn::Distinct: return "COUNT(DISTINCT " + sql_ident(a.col) + ")"; case AggFn::Stddev: return "STDDEV(" + sql_ident(a.col) + ")"; case AggFn::Median: return "quantile_cont(" + sql_ident(a.col) + ", 0.5)"; case AggFn::P25: return "quantile_cont(" + sql_ident(a.col) + ", 0.25)"; case AggFn::P75: return "quantile_cont(" + sql_ident(a.col) + ", 0.75)"; case AggFn::P90: return "quantile_cont(" + sql_ident(a.col) + ", 0.90)"; case AggFn::P99: return "quantile_cont(" + sql_ident(a.col) + ", 0.99)"; case AggFn::Percentile: { char buf[32]; std::snprintf(buf, sizeof(buf), "%g", a.arg); return std::string("quantile_cont(") + sql_ident(a.col) + ", " + buf + ")"; } } return "/* unknown agg */ NULL"; } std::string emit_breakout_expr(const std::string& bk) { std::string col_clean; DateGranularity g = parse_breakout_granularity(bk, col_clean); if (g == DateGranularity::None) { return sql_ident(col_clean); } const char* tok = date_granularity_token(g); // Week: DuckDB date_trunc('week', col) -> monday segun configuracion. return std::string("date_trunc('") + tok + "', " + sql_ident(col_clean) + ")"; } // Resuelve un Op a operador SQL + (opcional) override de RHS. const char* sql_op(Op op) { switch (op) { case Op::Eq: return " = "; case Op::Neq: return " <> "; case Op::Gt: return " > "; case Op::Gte: return " >= "; case Op::Lt: return " < "; case Op::Lte: return " <= "; case Op::Contains: return " LIKE "; case Op::NotContains: return " NOT LIKE "; case Op::StartsWith: return " LIKE "; case Op::EndsWith: return " LIKE "; } return " = "; } // Construye RHS literal/pattern segun op + value. Devuelve placeholder '?' // y push de params; o pattern string-literal directo para LIKE wildcards. std::string emit_filter_rhs(const Filter& f, std::vector& params) { if (f.op == Op::Contains || f.op == Op::NotContains) { std::string v = "%" + f.value + "%"; params.push_back(v); return "?"; } if (f.op == Op::StartsWith) { std::string v = f.value + "%"; params.push_back(v); return "?"; } if (f.op == Op::EndsWith) { std::string v = "%" + f.value; params.push_back(v); return "?"; } params.push_back(f.value); return "?"; } // Construye CTE stage 0 (Raw): SELECT cols + derived FROM main_t [JOINs]. // `tables` provee schema. main_t name = tables[main_idx].name. Derived cols // se transpilan a SQL expression; si fuera de subset, push warning + skip col. bool emit_stage0(const State& st, const std::vector& tables, int main_idx, SqlEmit& e) { if (main_idx < 0 || main_idx >= (int)tables.size()) { e.error = "main table out of range"; return false; } const TableInput& main_t = tables[(size_t)main_idx]; // SELECT list: cols originales + derived expressions (subset). std::string select_list; for (size_t i = 0; i < main_t.headers.size(); ++i) { if (i > 0) select_list += ", "; select_list += sql_ident(main_t.headers[i]); } // Derived cols (stage 0 derived). if (!st.stages.empty()) { const Stage& s0 = st.stages[0]; for (const auto& d : s0.derived) { if (d.source_col >= 0 && d.formula.empty()) { // Retipo puro: alias col origen. if (d.source_col < (int)main_t.headers.size()) { select_list += ", " + sql_ident(main_t.headers[(size_t)d.source_col]) + " AS " + sql_ident(d.name); } continue; } std::string err; std::string expr = transpile_expr(d.formula, main_t.headers, err); if (!err.empty()) { std::string msg = "derived col '" + d.name + "' formula out of SQL subset: " + err; e.warnings.push_back(msg); // Skip col en SQL output; agente puede recurrir a TQL puro. continue; } select_list += ", " + expr + " AS " + sql_ident(d.name); } } std::string from = sql_ident(main_t.name); // Joins for (const auto& jn : st.joins) { const TableInput* right = nullptr; for (const auto& ti : tables) { if (ti.name == jn.source) { right = &ti; break; } } if (!right) { e.warnings.push_back("join source '" + jn.source + "' not in tables"); continue; } const char* strat = "LEFT JOIN"; switch (jn.strategy) { case JoinStrategy::Left: strat = "LEFT JOIN"; break; case JoinStrategy::Inner: strat = "INNER JOIN"; break; case JoinStrategy::Right: strat = "RIGHT JOIN"; break; case JoinStrategy::Full: strat = "FULL OUTER JOIN"; break; } from += "\n " + std::string(strat) + " " + sql_ident(right->name) + " AS " + sql_ident(jn.alias) + " ON "; for (size_t k = 0; k < jn.on.size(); ++k) { if (k > 0) from += " AND "; from += sql_ident(main_t.name) + "." + sql_ident(jn.on[k].first) + " = " + sql_ident(jn.alias) + "." + sql_ident(jn.on[k].second); } // Anadir cols del right al SELECT con alias.col prefix. if (jn.fields.empty()) { for (const auto& rh : right->headers) { std::string aliased = jn.alias + "." + rh; select_list += ", " + sql_ident(jn.alias) + "." + sql_ident(rh) + " AS " + sql_ident(aliased); } } else { for (const auto& fld : jn.fields) { std::string aliased = jn.alias + "." + fld; select_list += ", " + sql_ident(jn.alias) + "." + sql_ident(fld) + " AS " + sql_ident(aliased); } } } // Stage 0 WHERE: filters del Raw (filter col idx en eff_headers). // Filter.col es indice en eff_headers (orig + derived). Para SQL emit, // necesitamos resolver col idx -> col name. Reconstruir orden eff_headers. std::vector eff_headers = main_t.headers; if (!st.stages.empty()) { for (const auto& d : st.stages[0].derived) { eff_headers.push_back(d.name); } } std::string where_clause; if (!st.stages.empty()) { const Stage& s0 = st.stages[0]; for (size_t fi = 0; fi < s0.filters.size(); ++fi) { const Filter& f = s0.filters[fi]; if (f.col < 0 || f.col >= (int)eff_headers.size()) { e.warnings.push_back("stage0 filter col idx out of range"); continue; } std::string col = sql_ident(eff_headers[(size_t)f.col]); if (!where_clause.empty()) where_clause += " AND "; where_clause += col + sql_op(f.op) + emit_filter_rhs(f, e.params); } } // Stage 0 sort std::string order_clause; if (!st.stages.empty()) { const Stage& s0 = st.stages[0]; for (size_t si = 0; si < s0.sorts.size(); ++si) { const SortClause& sc = s0.sorts[si]; if (!order_clause.empty()) order_clause += ", "; order_clause += sql_ident(sc.col) + (sc.desc ? " DESC" : " ASC"); } } std::string cte = "t0 AS (\n SELECT " + select_list + "\n FROM " + from; if (!where_clause.empty()) cte += "\n WHERE " + where_clause; if (!order_clause.empty()) cte += "\n ORDER BY " + order_clause; cte += "\n)"; e.sql = "WITH " + cte; return true; } // Stage N (N>=1): SELECT breakouts + agg expressions FROM t // [WHERE filters] [GROUP BY ...] [ORDER BY ...]. bool emit_stage_n(const Stage& stg, int n, SqlEmit& e) { std::string prev = "t" + std::to_string(n - 1); std::string cur = "t" + std::to_string(n); // SELECT list: breakouts (con granularity expr si aplica) + aggregations. std::string select_list; for (size_t i = 0; i < stg.breakouts.size(); ++i) { if (i > 0) select_list += ", "; select_list += emit_breakout_expr(stg.breakouts[i]) + " AS " + sql_ident(stg.breakouts[i]); } for (size_t i = 0; i < stg.aggregations.size(); ++i) { if (!select_list.empty()) select_list += ", "; std::string alias = aggregation_alias(stg.aggregations[i]); select_list += emit_agg_expr(stg.aggregations[i]) + " AS " + sql_ident(alias); } if (select_list.empty()) select_list = "*"; // WHERE: filters del stage. col es indice en input headers (output del stage previo). // Aproximacion: usamos el nombre via stage breakouts/aggs del stage previo si fuera necesario. // Para v1, emit por nombre cuando filter.col >= 0 sea idx en breakouts/aggs/orig. El // chequeo de existencia se delega a DuckDB (errores en execute son detectables). // V1 simple: skip filter cuando no podemos resolver — caller solo deberia tener filter // sobre cols que existen. // Estrategia simple: emite WHERE solo si stage previo provee headers conocidos. Para no // duplicar logica, dejamos al caller proveer headers via filter.col que se resuelve a // breakouts[col]. // V1: si filter.col esta en rango de breakouts del stage previo, emite breakout name. // Sino, warning + skip. std::string where_clause; // Best effort: no podemos construir headers del stage previo aqui sin recomputar. // Para v1, omitimos filters de stages >=1 — caller deberia evitar usarlos via SQL. // TODO v2: pasar prev_headers para resolver. (void)where_clause; // GROUP BY: solo si hay breakouts. std::string group_clause; for (size_t i = 0; i < stg.breakouts.size(); ++i) { if (i > 0) group_clause += ", "; // Re-emit la expression para GROUP BY (no alias). group_clause += emit_breakout_expr(stg.breakouts[i]); } // ORDER BY std::string order_clause; for (size_t i = 0; i < stg.sorts.size(); ++i) { if (i > 0) order_clause += ", "; order_clause += sql_ident(stg.sorts[i].col) + (stg.sorts[i].desc ? " DESC" : " ASC"); } std::string cte = ",\n" + cur + " AS (\n SELECT " + select_list + "\n FROM " + prev; if (!group_clause.empty()) cte += "\n GROUP BY " + group_clause; if (!order_clause.empty()) cte += "\n ORDER BY " + order_clause; cte += "\n)"; e.sql += cte; return true; } } // anon SqlEmit emit_sql(const State& state, const std::vector& tables, int up_to_stage) { SqlEmit out; if (state.stages.empty()) { out.error = "state has no stages"; return out; } if (tables.empty()) { out.error = "no input tables provided"; return out; } int target = (up_to_stage < 0) ? state.active_stage : up_to_stage; if (target < 0) target = 0; if (target >= (int)state.stages.size()) target = (int)state.stages.size() - 1; // Resolve main idx via state.main_source (o tables[0] default). int main_idx = resolve_main_idx(tables, state.main_source); if (main_idx < 0) main_idx = 0; if (!emit_stage0(state, tables, main_idx, out)) return out; for (int si = 1; si <= target; ++si) { if (!emit_stage_n(state.stages[(size_t)si], si, out)) return out; } out.sql += "\nSELECT * FROM t" + std::to_string(target) + ";\n"; return out; } } // namespace tql_to_sql