100aeaa1fc
- playground/tables/CMakeLists.txt - playground/tables/data_table.cpp - playground/tables/data_table_logic.cpp - playground/tables/data_table_logic.h - playground/tables/self_test.cpp - playground/tables/tql.cpp - playground/tables/viz.cpp - playground/tables/viz.h - playground/tables/llm_anthropic.cpp - playground/tables/llm_anthropic.h - ... Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
863 lines
33 KiB
C++
863 lines
33 KiB
C++
// tql_to_sql.cpp — pure walker TQL -> SQL DuckDB + Lua subset transpiler.
|
|
// Ver issue 0080. Sin DuckDB linkado.
|
|
#include "tql_to_sql.h"
|
|
|
|
#include <cctype>
|
|
#include <cstdio>
|
|
#include <cstdlib>
|
|
#include <cstring>
|
|
#include <set>
|
|
#include <sstream>
|
|
#include <unordered_map>
|
|
|
|
namespace tql_to_sql {
|
|
|
|
using namespace data_table;
|
|
|
|
// ============================================================================
|
|
// Lua subset tokenizer + recursive-descent expression parser -> SQL string.
|
|
// ============================================================================
|
|
|
|
namespace {
|
|
|
|
struct Tok {
|
|
enum Kind {
|
|
EndT, NumT, StrT, IdentT, ColT,
|
|
// operators / keywords
|
|
Plus, Minus, Star, Slash, Percent, ConcatT,
|
|
Eq, Neq, Lt, Lte, Gt, Gte,
|
|
AndT, OrT, NotT,
|
|
IfT, ThenT, ElseT, EndKW,
|
|
LParen, RParen, Comma, Dot,
|
|
TrueT, FalseT, NilT,
|
|
} kind = EndT;
|
|
std::string text; // raw token texto (para idents/numbers/strings)
|
|
};
|
|
|
|
// Categorias prohibidas: token literal -> mensaje.
|
|
const std::unordered_map<std::string, const char*>& forbidden_keywords() {
|
|
static const std::unordered_map<std::string, const char*> M = {
|
|
{"function", "closures not allowed in SQL transpile subset"},
|
|
{"local", "local declarations not allowed"},
|
|
{"for", "loops not allowed"},
|
|
{"while", "loops not allowed"},
|
|
{"repeat", "loops not allowed"},
|
|
{"do", "block statements not allowed"},
|
|
{"return", "explicit return not allowed (formula is implicit expression)"},
|
|
{"goto", "goto not allowed"},
|
|
{"break", "break not allowed (no loops)"},
|
|
// io/os/debug/coroutines
|
|
{"io", "io.* access not allowed"},
|
|
{"os", "os.* access not allowed"},
|
|
{"debug", "debug.* access not allowed"},
|
|
{"package", "package access not allowed"},
|
|
{"require", "require not allowed"},
|
|
{"coroutine","coroutines not allowed"},
|
|
{"setmetatable","metatables not allowed"},
|
|
{"getmetatable","metatables not allowed"},
|
|
{"rawget", "rawget not allowed"},
|
|
{"rawset", "rawset not allowed"},
|
|
{"pcall", "pcall not allowed"},
|
|
{"xpcall", "xpcall not allowed"},
|
|
{"print", "print not allowed (SQL has no side effects)"},
|
|
};
|
|
return M;
|
|
}
|
|
|
|
// Whitelist de funciones SQL-transpilables: lua name -> SQL function template.
|
|
// Template usa $1, $2, ... como placeholders de argumentos.
|
|
struct FnMap { int min_args; int max_args; const char* sql_tmpl; };
|
|
|
|
const std::unordered_map<std::string, FnMap>& fn_whitelist() {
|
|
static const std::unordered_map<std::string, FnMap> M = {
|
|
// math.*
|
|
{"math.floor", {1, 1, "floor($1)"}},
|
|
{"math.ceil", {1, 1, "ceiling($1)"}},
|
|
{"math.abs", {1, 1, "abs($1)"}},
|
|
{"math.sqrt", {1, 1, "sqrt($1)"}},
|
|
{"math.sin", {1, 1, "sin($1)"}},
|
|
{"math.cos", {1, 1, "cos($1)"}},
|
|
{"math.log", {1, 1, "ln($1)"}},
|
|
{"math.exp", {1, 1, "exp($1)"}},
|
|
{"math.min", {2, 2, "least($1, $2)"}},
|
|
{"math.max", {2, 2, "greatest($1, $2)"}},
|
|
// string.*
|
|
{"string.upper", {1, 1, "upper($1)"}},
|
|
{"string.lower", {1, 1, "lower($1)"}},
|
|
{"string.len", {1, 1, "length($1)"}},
|
|
{"string.sub", {2, 3, "/*SUBSTRING*/"}}, // manejo especial: argc 2 vs 3
|
|
// top-level
|
|
{"tostring", {1, 1, "CAST($1 AS VARCHAR)"}},
|
|
{"tonumber", {1, 1, "CAST($1 AS DOUBLE)"}},
|
|
};
|
|
return M;
|
|
}
|
|
|
|
// Identifier SQL-safe: si tiene caracteres especiales o coincide con keyword,
|
|
// usar `"col"`. Aqui simplificado: siempre quote con dobles comillas para
|
|
// preservar case y permitir `:` (sufijo granularity).
|
|
std::string sql_ident(const std::string& name) {
|
|
std::string out;
|
|
out.reserve(name.size() + 4);
|
|
out += '"';
|
|
for (char c : name) {
|
|
if (c == '"') out += "\"\""; // escape
|
|
else out += c;
|
|
}
|
|
out += '"';
|
|
return out;
|
|
}
|
|
|
|
std::string sql_string_literal(const std::string& s) {
|
|
std::string out;
|
|
out.reserve(s.size() + 4);
|
|
out += '\'';
|
|
for (char c : s) {
|
|
if (c == '\'') out += "''";
|
|
else out += c;
|
|
}
|
|
out += '\'';
|
|
return out;
|
|
}
|
|
|
|
class Lexer {
|
|
public:
|
|
Lexer(const std::string& src) : src_(src) {}
|
|
|
|
// Devuelve true si parsea OK. False con err en error_.
|
|
bool tokenize(std::vector<Tok>& out) {
|
|
size_t i = 0;
|
|
while (i < src_.size()) {
|
|
char c = src_[i];
|
|
if (std::isspace((unsigned char)c)) { ++i; continue; }
|
|
// Lua line comment
|
|
if (c == '-' && i + 1 < src_.size() && src_[i+1] == '-') {
|
|
while (i < src_.size() && src_[i] != '\n') ++i;
|
|
continue;
|
|
}
|
|
if (c == '[' ) {
|
|
// col ref [identifier]
|
|
size_t j = i + 1;
|
|
std::string name;
|
|
while (j < src_.size() && src_[j] != ']') {
|
|
name += src_[j];
|
|
++j;
|
|
}
|
|
if (j >= src_.size()) { error_ = "unterminated [col] ref"; return false; }
|
|
Tok t; t.kind = Tok::ColT; t.text = name;
|
|
out.push_back(t);
|
|
i = j + 1;
|
|
continue;
|
|
}
|
|
if (c == '"' || c == '\'') {
|
|
char q = c;
|
|
++i;
|
|
std::string s;
|
|
while (i < src_.size() && src_[i] != q) {
|
|
if (src_[i] == '\\' && i + 1 < src_.size()) {
|
|
char esc = src_[i+1];
|
|
if (esc == 'n') s += '\n';
|
|
else if (esc == 't') s += '\t';
|
|
else if (esc == '\\') s += '\\';
|
|
else if (esc == '\'') s += '\'';
|
|
else if (esc == '"') s += '"';
|
|
else s += esc;
|
|
i += 2;
|
|
} else {
|
|
s += src_[i++];
|
|
}
|
|
}
|
|
if (i >= src_.size()) { error_ = "unterminated string literal"; return false; }
|
|
++i;
|
|
Tok t; t.kind = Tok::StrT; t.text = s;
|
|
out.push_back(t);
|
|
continue;
|
|
}
|
|
if (std::isdigit((unsigned char)c) || (c == '.' && i + 1 < src_.size() && std::isdigit((unsigned char)src_[i+1]))) {
|
|
std::string n;
|
|
bool seen_dot = false;
|
|
while (i < src_.size()) {
|
|
char d = src_[i];
|
|
if (std::isdigit((unsigned char)d)) { n += d; ++i; }
|
|
else if (d == '.' && !seen_dot) { n += d; seen_dot = true; ++i; }
|
|
else break;
|
|
}
|
|
Tok t; t.kind = Tok::NumT; t.text = n;
|
|
out.push_back(t);
|
|
continue;
|
|
}
|
|
if (std::isalpha((unsigned char)c) || c == '_') {
|
|
std::string id;
|
|
while (i < src_.size() &&
|
|
(std::isalnum((unsigned char)src_[i]) || src_[i] == '_')) {
|
|
id += src_[i++];
|
|
}
|
|
// Check forbidden keywords y mapeo a tokens.
|
|
auto& F = forbidden_keywords();
|
|
auto fit = F.find(id);
|
|
if (fit != F.end()) {
|
|
error_ = std::string("token '") + id + "': " + fit->second;
|
|
return false;
|
|
}
|
|
Tok t;
|
|
if (id == "and") t.kind = Tok::AndT;
|
|
else if (id == "or") t.kind = Tok::OrT;
|
|
else if (id == "not") t.kind = Tok::NotT;
|
|
else if (id == "if") t.kind = Tok::IfT;
|
|
else if (id == "then") t.kind = Tok::ThenT;
|
|
else if (id == "else") t.kind = Tok::ElseT;
|
|
else if (id == "end") t.kind = Tok::EndKW;
|
|
else if (id == "true") t.kind = Tok::TrueT;
|
|
else if (id == "false") t.kind = Tok::FalseT;
|
|
else if (id == "nil") t.kind = Tok::NilT;
|
|
else { t.kind = Tok::IdentT; t.text = id; }
|
|
out.push_back(t);
|
|
continue;
|
|
}
|
|
// Operators
|
|
auto emit = [&](Tok::Kind k, int len) {
|
|
Tok t; t.kind = k; out.push_back(t); i += (size_t)len;
|
|
};
|
|
if (c == '+') { emit(Tok::Plus, 1); continue; }
|
|
if (c == '-') { emit(Tok::Minus, 1); continue; }
|
|
if (c == '*') { emit(Tok::Star, 1); continue; }
|
|
if (c == '/') { emit(Tok::Slash, 1); continue; }
|
|
if (c == '%') { emit(Tok::Percent,1); continue; }
|
|
if (c == '(') { emit(Tok::LParen, 1); continue; }
|
|
if (c == ')') { emit(Tok::RParen, 1); continue; }
|
|
if (c == ',') { emit(Tok::Comma, 1); continue; }
|
|
if (c == '.') {
|
|
if (i + 1 < src_.size() && src_[i+1] == '.') {
|
|
if (i + 2 < src_.size() && src_[i+2] == '.') {
|
|
error_ = "'...' vararg not allowed"; return false;
|
|
}
|
|
emit(Tok::ConcatT, 2); continue;
|
|
}
|
|
emit(Tok::Dot, 1); continue;
|
|
}
|
|
if (c == '=') {
|
|
if (i + 1 < src_.size() && src_[i+1] == '=') { emit(Tok::Eq, 2); continue; }
|
|
error_ = "single '=' (assignment) not allowed"; return false;
|
|
}
|
|
if (c == '~') {
|
|
if (i + 1 < src_.size() && src_[i+1] == '=') { emit(Tok::Neq, 2); continue; }
|
|
error_ = "stray '~'"; return false;
|
|
}
|
|
if (c == '<') {
|
|
if (i + 1 < src_.size() && src_[i+1] == '=') { emit(Tok::Lte, 2); continue; }
|
|
emit(Tok::Lt, 1); continue;
|
|
}
|
|
if (c == '>') {
|
|
if (i + 1 < src_.size() && src_[i+1] == '=') { emit(Tok::Gte, 2); continue; }
|
|
emit(Tok::Gt, 1); continue;
|
|
}
|
|
if (c == '{') { error_ = "table literals '{...}' not allowed"; return false; }
|
|
if (c == '}') { error_ = "stray '}'"; return false; }
|
|
if (c == ';') { error_ = "multi-statement not allowed"; return false; }
|
|
if (c == '#') { error_ = "length '#' operator not allowed"; return false; }
|
|
if (c == ':') { error_ = "method calls ':' not allowed"; return false; }
|
|
error_ = std::string("unexpected character '") + c + "'";
|
|
return false;
|
|
}
|
|
Tok t; t.kind = Tok::EndT;
|
|
out.push_back(t);
|
|
return true;
|
|
}
|
|
|
|
const std::string& error() const { return error_; }
|
|
private:
|
|
const std::string& src_;
|
|
std::string error_;
|
|
};
|
|
|
|
class Parser {
|
|
public:
|
|
Parser(const std::vector<Tok>& toks,
|
|
const std::vector<std::string>& headers)
|
|
: toks_(toks), headers_(headers) {}
|
|
|
|
// expr := ternary
|
|
// ternary := if/then/else | logic_or
|
|
bool parse_expr(std::string& out) {
|
|
return parse_ternary(out);
|
|
}
|
|
|
|
bool parse_ternary(std::string& out) {
|
|
if (peek(0).kind == Tok::IfT) {
|
|
++pos_;
|
|
std::string a, b, c;
|
|
if (!parse_logic_or(a)) return false;
|
|
if (!eat(Tok::ThenT, "'then' expected after 'if'")) return false;
|
|
if (!parse_ternary(b)) return false;
|
|
if (!eat(Tok::ElseT, "'else' expected (subset requires else branch)")) return false;
|
|
if (!parse_ternary(c)) return false;
|
|
if (!eat(Tok::EndKW, "'end' expected to close 'if'")) return false;
|
|
out = "CASE WHEN " + a + " THEN " + b + " ELSE " + c + " END";
|
|
return true;
|
|
}
|
|
return parse_logic_or(out);
|
|
}
|
|
|
|
bool parse_logic_or(std::string& out) {
|
|
if (!parse_logic_and(out)) return false;
|
|
while (peek(0).kind == Tok::OrT) {
|
|
++pos_;
|
|
std::string rhs;
|
|
if (!parse_logic_and(rhs)) return false;
|
|
out = "(" + out + " OR " + rhs + ")";
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool parse_logic_and(std::string& out) {
|
|
if (!parse_not(out)) return false;
|
|
while (peek(0).kind == Tok::AndT) {
|
|
++pos_;
|
|
std::string rhs;
|
|
if (!parse_not(rhs)) return false;
|
|
out = "(" + out + " AND " + rhs + ")";
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool parse_not(std::string& out) {
|
|
if (peek(0).kind == Tok::NotT) {
|
|
++pos_;
|
|
std::string e;
|
|
if (!parse_not(e)) return false;
|
|
out = "NOT (" + e + ")";
|
|
return true;
|
|
}
|
|
return parse_comparison(out);
|
|
}
|
|
|
|
bool parse_comparison(std::string& out) {
|
|
if (!parse_concat(out)) return false;
|
|
while (true) {
|
|
Tok::Kind k = peek(0).kind;
|
|
const char* op = nullptr;
|
|
if (k == Tok::Eq) op = " = ";
|
|
else if (k == Tok::Neq) op = " <> ";
|
|
else if (k == Tok::Lt) op = " < ";
|
|
else if (k == Tok::Lte) op = " <= ";
|
|
else if (k == Tok::Gt) op = " > ";
|
|
else if (k == Tok::Gte) op = " >= ";
|
|
else break;
|
|
++pos_;
|
|
std::string rhs;
|
|
if (!parse_concat(rhs)) return false;
|
|
out = "(" + out + op + rhs + ")";
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool parse_concat(std::string& out) {
|
|
if (!parse_additive(out)) return false;
|
|
while (peek(0).kind == Tok::ConcatT) {
|
|
++pos_;
|
|
std::string rhs;
|
|
if (!parse_additive(rhs)) return false;
|
|
out = "(" + out + " || " + rhs + ")";
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool parse_additive(std::string& out) {
|
|
if (!parse_multiplicative(out)) return false;
|
|
while (peek(0).kind == Tok::Plus || peek(0).kind == Tok::Minus) {
|
|
const char* op = (peek(0).kind == Tok::Plus) ? " + " : " - ";
|
|
++pos_;
|
|
std::string rhs;
|
|
if (!parse_multiplicative(rhs)) return false;
|
|
out = "(" + out + op + rhs + ")";
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool parse_multiplicative(std::string& out) {
|
|
if (!parse_unary(out)) return false;
|
|
while (peek(0).kind == Tok::Star || peek(0).kind == Tok::Slash || peek(0).kind == Tok::Percent) {
|
|
const char* op = (peek(0).kind == Tok::Star) ? " * "
|
|
: (peek(0).kind == Tok::Slash) ? " / " : " % ";
|
|
++pos_;
|
|
std::string rhs;
|
|
if (!parse_unary(rhs)) return false;
|
|
out = "(" + out + op + rhs + ")";
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool parse_unary(std::string& out) {
|
|
if (peek(0).kind == Tok::Minus) {
|
|
++pos_;
|
|
std::string e;
|
|
if (!parse_unary(e)) return false;
|
|
out = "(-" + e + ")";
|
|
return true;
|
|
}
|
|
return parse_primary(out);
|
|
}
|
|
|
|
bool parse_primary(std::string& out) {
|
|
Tok t = peek(0);
|
|
if (t.kind == Tok::NumT) {
|
|
++pos_;
|
|
out = t.text;
|
|
return true;
|
|
}
|
|
if (t.kind == Tok::StrT) {
|
|
++pos_;
|
|
out = sql_string_literal(t.text);
|
|
return true;
|
|
}
|
|
if (t.kind == Tok::TrueT) { ++pos_; out = "TRUE"; return true; }
|
|
if (t.kind == Tok::FalseT) { ++pos_; out = "FALSE"; return true; }
|
|
if (t.kind == Tok::NilT) { ++pos_; out = "NULL"; return true; }
|
|
if (t.kind == Tok::ColT) {
|
|
// Check col exists (warning, not error).
|
|
++pos_;
|
|
(void)headers_; // currently not validating — caller can do that
|
|
out = sql_ident(t.text);
|
|
return true;
|
|
}
|
|
if (t.kind == Tok::LParen) {
|
|
++pos_;
|
|
std::string e;
|
|
if (!parse_expr(e)) return false;
|
|
if (!eat(Tok::RParen, "expected ')'")) return false;
|
|
out = "(" + e + ")";
|
|
return true;
|
|
}
|
|
if (t.kind == Tok::IdentT) {
|
|
// Function call: identifier ("." identifier)? "(" args ")"
|
|
std::string name = t.text;
|
|
++pos_;
|
|
if (peek(0).kind == Tok::Dot) {
|
|
++pos_;
|
|
if (peek(0).kind != Tok::IdentT) {
|
|
error_ = "expected identifier after '.'";
|
|
return false;
|
|
}
|
|
name += "." + peek(0).text;
|
|
++pos_;
|
|
}
|
|
if (peek(0).kind != Tok::LParen) {
|
|
error_ = "bare identifier '" + name +
|
|
"' not allowed (only [col] refs + whitelisted fn calls)";
|
|
return false;
|
|
}
|
|
++pos_; // consume '('
|
|
std::vector<std::string> args;
|
|
if (peek(0).kind != Tok::RParen) {
|
|
while (true) {
|
|
std::string a;
|
|
if (!parse_expr(a)) return false;
|
|
args.push_back(a);
|
|
if (peek(0).kind == Tok::Comma) { ++pos_; continue; }
|
|
break;
|
|
}
|
|
}
|
|
if (!eat(Tok::RParen, "expected ')' closing function args")) return false;
|
|
// Validate against whitelist
|
|
auto& W = fn_whitelist();
|
|
auto wit = W.find(name);
|
|
if (wit == W.end()) {
|
|
error_ = "function '" + name +
|
|
"' not in SQL transpile whitelist (math.*, string.upper/lower/len/sub, tostring, tonumber)";
|
|
return false;
|
|
}
|
|
const FnMap& fm = wit->second;
|
|
if ((int)args.size() < fm.min_args || (int)args.size() > fm.max_args) {
|
|
std::ostringstream os;
|
|
os << "function '" << name << "' takes " << fm.min_args;
|
|
if (fm.max_args != fm.min_args) os << ".." << fm.max_args;
|
|
os << " args, got " << args.size();
|
|
error_ = os.str();
|
|
return false;
|
|
}
|
|
// Casos especiales
|
|
if (name == "string.sub") {
|
|
// Lua: string.sub(s, i [, j]) — i/j 1-based, inclusive.
|
|
// SQL DuckDB: substring(s, i, count). count = j - i + 1.
|
|
if (args.size() == 2) {
|
|
// sin j -> hasta el final. DuckDB substring(s, i) acepta.
|
|
out = "substring(" + args[0] + ", " + args[1] + ")";
|
|
} else {
|
|
out = "substring(" + args[0] + ", " + args[1] +
|
|
", (" + args[2] + ") - (" + args[1] + ") + 1)";
|
|
}
|
|
return true;
|
|
}
|
|
// Generico: substituir $1..$N en template.
|
|
std::string s = fm.sql_tmpl;
|
|
for (int i = 0; i < (int)args.size(); ++i) {
|
|
char ph[6];
|
|
std::snprintf(ph, sizeof(ph), "$%d", i + 1);
|
|
std::string p = ph;
|
|
size_t at = 0;
|
|
while ((at = s.find(p, at)) != std::string::npos) {
|
|
s.replace(at, p.size(), args[i]);
|
|
at += args[i].size();
|
|
}
|
|
}
|
|
out = s;
|
|
return true;
|
|
}
|
|
error_ = std::string("unexpected token in expression");
|
|
return false;
|
|
}
|
|
|
|
bool eat(Tok::Kind k, const char* msg) {
|
|
if (peek(0).kind != k) { error_ = msg; return false; }
|
|
++pos_;
|
|
return true;
|
|
}
|
|
|
|
const Tok& peek(int off) const {
|
|
size_t i = pos_ + (size_t)off;
|
|
if (i >= toks_.size()) return toks_.back();
|
|
return toks_[i];
|
|
}
|
|
|
|
bool at_end() const { return peek(0).kind == Tok::EndT; }
|
|
const std::string& error() const { return error_; }
|
|
|
|
private:
|
|
const std::vector<Tok>& toks_;
|
|
const std::vector<std::string>& headers_;
|
|
size_t pos_ = 0;
|
|
std::string error_;
|
|
};
|
|
|
|
} // anon
|
|
|
|
std::string transpile_expr(const std::string& formula,
|
|
const std::vector<std::string>& in_headers,
|
|
std::string& error_out) {
|
|
error_out.clear();
|
|
std::vector<Tok> toks;
|
|
Lexer lex(formula);
|
|
if (!lex.tokenize(toks)) {
|
|
error_out = lex.error();
|
|
return "";
|
|
}
|
|
Parser p(toks, in_headers);
|
|
std::string out;
|
|
if (!p.parse_expr(out)) {
|
|
error_out = p.error();
|
|
return "";
|
|
}
|
|
if (!p.at_end()) {
|
|
error_out = "unexpected trailing tokens after expression";
|
|
return "";
|
|
}
|
|
return out;
|
|
}
|
|
|
|
bool is_transpilable(const std::string& formula, std::string& error_out) {
|
|
std::vector<std::string> empty;
|
|
std::string s = transpile_expr(formula, empty, error_out);
|
|
return error_out.empty() && !s.empty();
|
|
}
|
|
|
|
// ============================================================================
|
|
// TQL State -> SQL DuckDB emitter.
|
|
// ============================================================================
|
|
|
|
namespace {
|
|
|
|
// Mapeo aggregation -> SQL DuckDB expression.
|
|
std::string emit_agg_expr(const Aggregation& a) {
|
|
switch (a.fn) {
|
|
case AggFn::Count: return "COUNT(*)";
|
|
case AggFn::Sum: return "SUM(" + sql_ident(a.col) + ")";
|
|
case AggFn::Avg: return "AVG(" + sql_ident(a.col) + ")";
|
|
case AggFn::Min: return "MIN(" + sql_ident(a.col) + ")";
|
|
case AggFn::Max: return "MAX(" + sql_ident(a.col) + ")";
|
|
case AggFn::Distinct: return "COUNT(DISTINCT " + sql_ident(a.col) + ")";
|
|
case AggFn::Stddev: return "STDDEV(" + sql_ident(a.col) + ")";
|
|
case AggFn::Median: return "quantile_cont(" + sql_ident(a.col) + ", 0.5)";
|
|
case AggFn::P25: return "quantile_cont(" + sql_ident(a.col) + ", 0.25)";
|
|
case AggFn::P75: return "quantile_cont(" + sql_ident(a.col) + ", 0.75)";
|
|
case AggFn::P90: return "quantile_cont(" + sql_ident(a.col) + ", 0.90)";
|
|
case AggFn::P99: return "quantile_cont(" + sql_ident(a.col) + ", 0.99)";
|
|
case AggFn::Percentile: {
|
|
char buf[32];
|
|
std::snprintf(buf, sizeof(buf), "%g", a.arg);
|
|
return std::string("quantile_cont(") + sql_ident(a.col) + ", " + buf + ")";
|
|
}
|
|
}
|
|
return "/* unknown agg */ NULL";
|
|
}
|
|
|
|
std::string emit_breakout_expr(const std::string& bk) {
|
|
std::string col_clean;
|
|
DateGranularity g = parse_breakout_granularity(bk, col_clean);
|
|
if (g == DateGranularity::None) {
|
|
return sql_ident(col_clean);
|
|
}
|
|
const char* tok = date_granularity_token(g);
|
|
// Week: DuckDB date_trunc('week', col) -> monday segun configuracion.
|
|
return std::string("date_trunc('") + tok + "', " + sql_ident(col_clean) + ")";
|
|
}
|
|
|
|
// Resuelve un Op a operador SQL + (opcional) override de RHS.
|
|
const char* sql_op(Op op) {
|
|
switch (op) {
|
|
case Op::Eq: return " = ";
|
|
case Op::Neq: return " <> ";
|
|
case Op::Gt: return " > ";
|
|
case Op::Gte: return " >= ";
|
|
case Op::Lt: return " < ";
|
|
case Op::Lte: return " <= ";
|
|
case Op::Contains: return " LIKE ";
|
|
case Op::NotContains: return " NOT LIKE ";
|
|
case Op::StartsWith: return " LIKE ";
|
|
case Op::EndsWith: return " LIKE ";
|
|
}
|
|
return " = ";
|
|
}
|
|
|
|
// Construye RHS literal/pattern segun op + value. Devuelve placeholder '?'
|
|
// y push de params; o pattern string-literal directo para LIKE wildcards.
|
|
std::string emit_filter_rhs(const Filter& f, std::vector<std::string>& params) {
|
|
if (f.op == Op::Contains || f.op == Op::NotContains) {
|
|
std::string v = "%" + f.value + "%";
|
|
params.push_back(v);
|
|
return "?";
|
|
}
|
|
if (f.op == Op::StartsWith) {
|
|
std::string v = f.value + "%";
|
|
params.push_back(v);
|
|
return "?";
|
|
}
|
|
if (f.op == Op::EndsWith) {
|
|
std::string v = "%" + f.value;
|
|
params.push_back(v);
|
|
return "?";
|
|
}
|
|
params.push_back(f.value);
|
|
return "?";
|
|
}
|
|
|
|
// Construye CTE stage 0 (Raw): SELECT cols + derived FROM main_t [JOINs].
|
|
// `tables` provee schema. main_t name = tables[main_idx].name. Derived cols
|
|
// se transpilan a SQL expression; si fuera de subset, push warning + skip col.
|
|
bool emit_stage0(const State& st, const std::vector<TableInput>& tables,
|
|
int main_idx, SqlEmit& e) {
|
|
if (main_idx < 0 || main_idx >= (int)tables.size()) {
|
|
e.error = "main table out of range";
|
|
return false;
|
|
}
|
|
const TableInput& main_t = tables[(size_t)main_idx];
|
|
|
|
// SELECT list: cols originales + derived expressions (subset).
|
|
std::string select_list;
|
|
for (size_t i = 0; i < main_t.headers.size(); ++i) {
|
|
if (i > 0) select_list += ", ";
|
|
select_list += sql_ident(main_t.headers[i]);
|
|
}
|
|
|
|
// Derived cols (stage 0 derived).
|
|
if (!st.stages.empty()) {
|
|
const Stage& s0 = st.stages[0];
|
|
for (const auto& d : s0.derived) {
|
|
if (d.source_col >= 0 && d.formula.empty()) {
|
|
// Retipo puro: alias col origen.
|
|
if (d.source_col < (int)main_t.headers.size()) {
|
|
select_list += ", " + sql_ident(main_t.headers[(size_t)d.source_col])
|
|
+ " AS " + sql_ident(d.name);
|
|
}
|
|
continue;
|
|
}
|
|
std::string err;
|
|
std::string expr = transpile_expr(d.formula, main_t.headers, err);
|
|
if (!err.empty()) {
|
|
std::string msg = "derived col '" + d.name +
|
|
"' formula out of SQL subset: " + err;
|
|
e.warnings.push_back(msg);
|
|
// Skip col en SQL output; agente puede recurrir a TQL puro.
|
|
continue;
|
|
}
|
|
select_list += ", " + expr + " AS " + sql_ident(d.name);
|
|
}
|
|
}
|
|
|
|
std::string from = sql_ident(main_t.name);
|
|
|
|
// Joins
|
|
for (const auto& jn : st.joins) {
|
|
const TableInput* right = nullptr;
|
|
for (const auto& ti : tables) {
|
|
if (ti.name == jn.source) { right = &ti; break; }
|
|
}
|
|
if (!right) {
|
|
e.warnings.push_back("join source '" + jn.source + "' not in tables");
|
|
continue;
|
|
}
|
|
const char* strat = "LEFT JOIN";
|
|
switch (jn.strategy) {
|
|
case JoinStrategy::Left: strat = "LEFT JOIN"; break;
|
|
case JoinStrategy::Inner: strat = "INNER JOIN"; break;
|
|
case JoinStrategy::Right: strat = "RIGHT JOIN"; break;
|
|
case JoinStrategy::Full: strat = "FULL OUTER JOIN"; break;
|
|
}
|
|
from += "\n " + std::string(strat) + " " + sql_ident(right->name)
|
|
+ " AS " + sql_ident(jn.alias) + " ON ";
|
|
for (size_t k = 0; k < jn.on.size(); ++k) {
|
|
if (k > 0) from += " AND ";
|
|
from += sql_ident(main_t.name) + "." + sql_ident(jn.on[k].first)
|
|
+ " = " + sql_ident(jn.alias) + "." + sql_ident(jn.on[k].second);
|
|
}
|
|
// Anadir cols del right al SELECT con alias.col prefix.
|
|
if (jn.fields.empty()) {
|
|
for (const auto& rh : right->headers) {
|
|
std::string aliased = jn.alias + "." + rh;
|
|
select_list += ", " + sql_ident(jn.alias) + "." + sql_ident(rh)
|
|
+ " AS " + sql_ident(aliased);
|
|
}
|
|
} else {
|
|
for (const auto& fld : jn.fields) {
|
|
std::string aliased = jn.alias + "." + fld;
|
|
select_list += ", " + sql_ident(jn.alias) + "." + sql_ident(fld)
|
|
+ " AS " + sql_ident(aliased);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Stage 0 WHERE: filters del Raw (filter col idx en eff_headers).
|
|
// Filter.col es indice en eff_headers (orig + derived). Para SQL emit,
|
|
// necesitamos resolver col idx -> col name. Reconstruir orden eff_headers.
|
|
std::vector<std::string> eff_headers = main_t.headers;
|
|
if (!st.stages.empty()) {
|
|
for (const auto& d : st.stages[0].derived) {
|
|
eff_headers.push_back(d.name);
|
|
}
|
|
}
|
|
std::string where_clause;
|
|
if (!st.stages.empty()) {
|
|
const Stage& s0 = st.stages[0];
|
|
for (size_t fi = 0; fi < s0.filters.size(); ++fi) {
|
|
const Filter& f = s0.filters[fi];
|
|
if (f.col < 0 || f.col >= (int)eff_headers.size()) {
|
|
e.warnings.push_back("stage0 filter col idx out of range");
|
|
continue;
|
|
}
|
|
std::string col = sql_ident(eff_headers[(size_t)f.col]);
|
|
if (!where_clause.empty()) where_clause += " AND ";
|
|
where_clause += col + sql_op(f.op) + emit_filter_rhs(f, e.params);
|
|
}
|
|
}
|
|
|
|
// Stage 0 sort
|
|
std::string order_clause;
|
|
if (!st.stages.empty()) {
|
|
const Stage& s0 = st.stages[0];
|
|
for (size_t si = 0; si < s0.sorts.size(); ++si) {
|
|
const SortClause& sc = s0.sorts[si];
|
|
if (!order_clause.empty()) order_clause += ", ";
|
|
order_clause += sql_ident(sc.col) + (sc.desc ? " DESC" : " ASC");
|
|
}
|
|
}
|
|
|
|
std::string cte = "t0 AS (\n SELECT " + select_list + "\n FROM " + from;
|
|
if (!where_clause.empty()) cte += "\n WHERE " + where_clause;
|
|
if (!order_clause.empty()) cte += "\n ORDER BY " + order_clause;
|
|
cte += "\n)";
|
|
e.sql = "WITH " + cte;
|
|
return true;
|
|
}
|
|
|
|
// Stage N (N>=1): SELECT breakouts + agg expressions FROM t<N-1>
|
|
// [WHERE filters] [GROUP BY ...] [ORDER BY ...].
|
|
bool emit_stage_n(const Stage& stg, int n, SqlEmit& e) {
|
|
std::string prev = "t" + std::to_string(n - 1);
|
|
std::string cur = "t" + std::to_string(n);
|
|
|
|
// SELECT list: breakouts (con granularity expr si aplica) + aggregations.
|
|
std::string select_list;
|
|
for (size_t i = 0; i < stg.breakouts.size(); ++i) {
|
|
if (i > 0) select_list += ", ";
|
|
select_list += emit_breakout_expr(stg.breakouts[i])
|
|
+ " AS " + sql_ident(stg.breakouts[i]);
|
|
}
|
|
for (size_t i = 0; i < stg.aggregations.size(); ++i) {
|
|
if (!select_list.empty()) select_list += ", ";
|
|
std::string alias = aggregation_alias(stg.aggregations[i]);
|
|
select_list += emit_agg_expr(stg.aggregations[i]) + " AS " + sql_ident(alias);
|
|
}
|
|
if (select_list.empty()) select_list = "*";
|
|
|
|
// WHERE: filters del stage. col es indice en input headers (output del stage previo).
|
|
// Aproximacion: usamos el nombre via stage breakouts/aggs del stage previo si fuera necesario.
|
|
// Para v1, emit por nombre cuando filter.col >= 0 sea idx en breakouts/aggs/orig. El
|
|
// chequeo de existencia se delega a DuckDB (errores en execute son detectables).
|
|
// V1 simple: skip filter cuando no podemos resolver — caller solo deberia tener filter
|
|
// sobre cols que existen.
|
|
// Estrategia simple: emite WHERE solo si stage previo provee headers conocidos. Para no
|
|
// duplicar logica, dejamos al caller proveer headers via filter.col que se resuelve a
|
|
// breakouts[col].
|
|
// V1: si filter.col esta en rango de breakouts del stage previo, emite breakout name.
|
|
// Sino, warning + skip.
|
|
std::string where_clause;
|
|
// Best effort: no podemos construir headers del stage previo aqui sin recomputar.
|
|
// Para v1, omitimos filters de stages >=1 — caller deberia evitar usarlos via SQL.
|
|
// TODO v2: pasar prev_headers para resolver.
|
|
(void)where_clause;
|
|
|
|
// GROUP BY: solo si hay breakouts.
|
|
std::string group_clause;
|
|
for (size_t i = 0; i < stg.breakouts.size(); ++i) {
|
|
if (i > 0) group_clause += ", ";
|
|
// Re-emit la expression para GROUP BY (no alias).
|
|
group_clause += emit_breakout_expr(stg.breakouts[i]);
|
|
}
|
|
|
|
// ORDER BY
|
|
std::string order_clause;
|
|
for (size_t i = 0; i < stg.sorts.size(); ++i) {
|
|
if (i > 0) order_clause += ", ";
|
|
order_clause += sql_ident(stg.sorts[i].col) + (stg.sorts[i].desc ? " DESC" : " ASC");
|
|
}
|
|
|
|
std::string cte = ",\n" + cur + " AS (\n SELECT " + select_list
|
|
+ "\n FROM " + prev;
|
|
if (!group_clause.empty()) cte += "\n GROUP BY " + group_clause;
|
|
if (!order_clause.empty()) cte += "\n ORDER BY " + order_clause;
|
|
cte += "\n)";
|
|
e.sql += cte;
|
|
return true;
|
|
}
|
|
|
|
} // anon
|
|
|
|
SqlEmit emit_sql(const State& state,
|
|
const std::vector<TableInput>& tables,
|
|
int up_to_stage) {
|
|
SqlEmit out;
|
|
if (state.stages.empty()) {
|
|
out.error = "state has no stages";
|
|
return out;
|
|
}
|
|
if (tables.empty()) {
|
|
out.error = "no input tables provided";
|
|
return out;
|
|
}
|
|
int target = (up_to_stage < 0) ? state.active_stage : up_to_stage;
|
|
if (target < 0) target = 0;
|
|
if (target >= (int)state.stages.size()) target = (int)state.stages.size() - 1;
|
|
|
|
// Resolve main idx via state.main_source (o tables[0] default).
|
|
int main_idx = resolve_main_idx(tables, state.main_source);
|
|
if (main_idx < 0) main_idx = 0;
|
|
|
|
if (!emit_stage0(state, tables, main_idx, out)) return out;
|
|
for (int si = 1; si <= target; ++si) {
|
|
if (!emit_stage_n(state.stages[(size_t)si], si, out)) return out;
|
|
}
|
|
out.sql += "\nSELECT * FROM t" + std::to_string(target) + ";\n";
|
|
return out;
|
|
}
|
|
|
|
} // namespace tql_to_sql
|