diff --git a/cpp/apps/primitives_gallery/CMakeLists.txt b/cpp/apps/primitives_gallery/CMakeLists.txt index 9d2bb614..4f8bc6ba 100644 --- a/cpp/apps/primitives_gallery/CMakeLists.txt +++ b/cpp/apps/primitives_gallery/CMakeLists.txt @@ -21,8 +21,9 @@ add_imgui_app(primitives_gallery ${CMAKE_SOURCE_DIR}/functions/core/text_editor.cpp ${CMAKE_SOURCE_DIR}/functions/core/file_watcher.cpp ${CMAKE_SOURCE_DIR}/vendor/imgui_text_edit/TextEditor.cpp - # sql_workbench (issue 0032) + # sql_workbench (issue 0032) + sql_parse pure (issue 0045) ${CMAKE_SOURCE_DIR}/functions/core/sql_workbench.cpp + ${CMAKE_SOURCE_DIR}/functions/core/sql_parse.cpp # Core primitives demoed (tokens vive en fn_framework) ${CMAKE_SOURCE_DIR}/functions/core/fullscreen_window.cpp ${CMAKE_SOURCE_DIR}/functions/core/page_header.cpp diff --git a/cpp/functions/core/sql_parse.cpp b/cpp/functions/core/sql_parse.cpp new file mode 100644 index 00000000..22b9e3e2 --- /dev/null +++ b/cpp/functions/core/sql_parse.cpp @@ -0,0 +1,172 @@ +#include "core/sql_parse.h" + +#include +#include + +namespace fn_ui { + +namespace { + +// Devuelve la version uppercase ASCII del primer token (delimitado por +// whitespace) de `s`, asumiendo que `s` empieza ya en el token. +std::string first_token_upper(const std::string& s, size_t start) { + std::string out; + while (start < s.size()) { + unsigned char c = static_cast(s[start]); + if (std::isspace(c)) break; + if (!std::isalpha(c)) break; // keywords son solo letras + out.push_back(static_cast(std::toupper(c))); + ++start; + } + return out; +} + +// Devuelve el indice del primer caracter "real" (no whitespace, no comentario) +// a partir de `i`. Avanza saltando -- ... \n y /* ... */. +size_t skip_ws_and_comments(const std::string& s, size_t i) { + while (i < s.size()) { + unsigned char c = static_cast(s[i]); + if (std::isspace(c)) { ++i; continue; } + if (c == '-' && i + 1 < s.size() && s[i + 1] == '-') { + // line comment hasta \n + i += 2; + while (i < s.size() && s[i] != '\n') ++i; + continue; + } + if (c == '/' && i + 1 < s.size() && s[i + 1] == '*') { + // block comment hasta */ + i += 2; + while (i + 1 < s.size() && !(s[i] == '*' && s[i + 1] == '/')) ++i; + if (i + 1 < s.size()) i += 2; + else i = s.size(); + continue; + } + break; + } + return i; +} + +std::string trim(const std::string& s) { + size_t a = 0, b = s.size(); + while (a < b && std::isspace(static_cast(s[a]))) ++a; + while (b > a && std::isspace(static_cast(s[b - 1]))) --b; + return s.substr(a, b - a); +} + +} // namespace + +SqlStmtKind sql_classify(const std::string& stmt) { + size_t i = skip_ws_and_comments(stmt, 0); + if (i >= stmt.size()) return SqlStmtKind::Unknown; + std::string head = first_token_upper(stmt, i); + if (head == "SELECT" || head == "WITH") return SqlStmtKind::Select; + if (head == "INSERT") return SqlStmtKind::Insert; + if (head == "UPDATE") return SqlStmtKind::Update; + if (head == "DELETE") return SqlStmtKind::Delete; + if (head == "CREATE") return SqlStmtKind::Create; + if (head == "DROP") return SqlStmtKind::Drop; + if (head == "ALTER") return SqlStmtKind::Alter; + if (head == "PRAGMA") return SqlStmtKind::Pragma; + if (head == "EXPLAIN") return SqlStmtKind::Explain; + return SqlStmtKind::Unknown; +} + +std::vector sql_parse(const std::string& input) { + std::vector out; + + enum class Mode { Normal, LineComment, BlockComment, SingleStr, DoubleStr, BackTick }; + Mode m = Mode::Normal; + + size_t stmt_start = 0; + int cur_line = 1; + int stmt_line = 1; + bool stmt_has_content = false; + + auto flush = [&](size_t end) { + std::string raw = input.substr(stmt_start, end - stmt_start); + std::string t = trim(raw); + if (!t.empty()) { + SqlStatement s; + s.text = t; + s.kind = sql_classify(t); + s.line = stmt_line; + out.push_back(std::move(s)); + } + }; + + for (size_t i = 0; i < input.size(); ++i) { + char c = input[i]; + char n = (i + 1 < input.size()) ? input[i + 1] : '\0'; + if (c == '\n') ++cur_line; + + switch (m) { + case Mode::Normal: + if (!stmt_has_content && !std::isspace(static_cast(c))) { + // marca inicio "real" de un statement (despues de skip ws/comments) + stmt_line = cur_line; + stmt_has_content = true; + } + if (c == '-' && n == '-') { + m = Mode::LineComment; + ++i; + if (n == '\n') ++cur_line; // (no aplica, n es '-') + } else if (c == '/' && n == '*') { + m = Mode::BlockComment; + ++i; + } else if (c == '\'') { + m = Mode::SingleStr; + } else if (c == '"') { + m = Mode::DoubleStr; + } else if (c == '`') { + m = Mode::BackTick; + } else if (c == ';') { + flush(i); + stmt_start = i + 1; + stmt_has_content = false; + } + break; + + case Mode::LineComment: + if (c == '\n') m = Mode::Normal; + break; + + case Mode::BlockComment: + if (c == '*' && n == '/') { + m = Mode::Normal; + ++i; + } + break; + + case Mode::SingleStr: + if (c == '\'') { + // SQL escapa '' como literal, sigue dentro de la cadena. + if (n == '\'') { ++i; } + else m = Mode::Normal; + } + break; + + case Mode::DoubleStr: + if (c == '"') { + if (n == '"') { ++i; } + else m = Mode::Normal; + } + break; + + case Mode::BackTick: + if (c == '`') { + if (n == '`') { ++i; } + else m = Mode::Normal; + } + break; + } + } + + // ultimo statement sin ';' final + if (stmt_start < input.size()) { + flush(input.size()); + } + + return out; +} + +} // namespace fn_ui diff --git a/cpp/functions/core/sql_parse.h b/cpp/functions/core/sql_parse.h new file mode 100644 index 00000000..4bfd380d --- /dev/null +++ b/cpp/functions/core/sql_parse.h @@ -0,0 +1,48 @@ +#pragma once + +// sql_parse — tokenizer y clasificador de statements SQL (logica pura). +// +// Separa una cadena multi-statement por ';' (fuera de strings y comentarios) +// y clasifica cada statement por su keyword inicial. No ejecuta nada — esta +// funcion es 100% pura: misma entrada, misma salida. +// +// Uso tipico: +// +// auto stmts = fn_ui::sql_parse("SELECT 1; INSERT INTO t VALUES (2);"); +// for (auto& s : stmts) { +// switch (s.kind) { ... } +// } + +#include +#include + +namespace fn_ui { + +enum class SqlStmtKind { + Unknown, + Select, + Insert, + Update, + Delete, + Create, + Drop, + Alter, + Pragma, + Explain, +}; + +struct SqlStatement { + SqlStmtKind kind = SqlStmtKind::Unknown; + std::string text; // texto trimeado (sin ';' final) + int line = 1; // linea de inicio en el input (1-based) +}; + +// Tokeniza SQL multi-statement. Salta cadenas '...' "..." `...` y comentarios +// -- linea y /* bloque */. Devuelve los statements no vacios. +std::vector sql_parse(const std::string& input); + +// Clasifica un statement individual por su keyword inicial (case-insensitive, +// despues de saltar whitespace y comentarios iniciales). +SqlStmtKind sql_classify(const std::string& stmt); + +} // namespace fn_ui diff --git a/cpp/functions/core/sql_parse.md b/cpp/functions/core/sql_parse.md new file mode 100644 index 00000000..09eb7c88 --- /dev/null +++ b/cpp/functions/core/sql_parse.md @@ -0,0 +1,76 @@ +--- +name: sql_parse +kind: function +lang: cpp +domain: core +version: "1.0.0" +purity: pure +signature: "std::vector fn_ui::sql_parse(const std::string& input); fn_ui::SqlStmtKind fn_ui::sql_classify(const std::string& stmt)" +description: "Tokenizer y clasificador puro de SQL multi-statement. Separa por ';' fuera de strings ('...', \"...\", `...`) y comentarios (-- linea, /* bloque */), trimea, y clasifica cada statement por su keyword inicial (SELECT, INSERT, UPDATE, DELETE, CREATE, DROP, ALTER, PRAGMA, EXPLAIN, WITH→Select)." +tags: [sql, parser, tokenizer, pure, sqlite] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [] +tested: true +tests: ["sql_parse classifies common statements", "sql_parse handles strings and comments", "sql_parse trims and ignores empty"] +test_file_path: "cpp/tests/test_sql_parse.cpp" +file_path: "cpp/functions/core/sql_parse.cpp" +params: + - name: input + desc: "Texto SQL completo, posiblemente multi-statement, con ';' opcional al final" + - name: stmt + desc: "(sql_classify) Un solo statement ya separado, sin ';' final" +output: "sql_parse: vector con un SqlStatement por statement no vacio (kind, text trimeado, line 1-based de inicio en el input). sql_classify: SqlStmtKind segun la primera keyword del statement." +--- + +# sql_parse + +Logica pura para entender un script SQL antes de pasarlo al motor: separar +statements y clasificarlos. Sin estado, sin I/O, sin SQLite. Reutilizable +desde `sql_workbench` y desde cualquier app/CLI que necesite distinguir +SELECT de DDL para mostrar info al usuario o decidir como ejecutar. + +## API + +```cpp +namespace fn_ui { + +enum class SqlStmtKind { + Unknown, Select, Insert, Update, Delete, Create, Drop, Alter, Pragma, Explain +}; + +struct SqlStatement { + SqlStmtKind kind; + std::string text; // texto trimeado + int line; // linea de inicio en el input (1-based) +}; + +std::vector sql_parse(const std::string& input); +SqlStmtKind sql_classify(const std::string& stmt); + +} +``` + +## Reglas del tokenizer + +- Strings: `'...'`, `"..."` y `` `...` `` se saltan enteras. Soporta el escape + SQL estandar de doblar la quote (`'don''t'`, `"a""b"`). +- Comentarios: `-- linea` hasta `\n` y `/* bloque */`. +- El separador `;` solo divide cuando aparece en modo Normal (fuera de + strings/comments). +- Statements vacios (`;;`, `; ;`) se descartan tras trimear. +- Si el ultimo statement no termina en `;`, se incluye igualmente. + +## Clasificacion + +`sql_classify` mira la primera palabra alfabetica despues de saltar whitespace +y comentarios. `WITH` se clasifica como `Select` porque es la forma comun de +iniciar CTEs que devuelven filas. + +## Por que pura + +No abre conexiones, no toca SQLite, no consulta el reloj. Misma entrada → misma +salida. Esto permite testearla sin depender de un fixture de DB.