100aeaa1fc
- playground/tables/CMakeLists.txt - playground/tables/data_table.cpp - playground/tables/data_table_logic.cpp - playground/tables/data_table_logic.h - playground/tables/self_test.cpp - playground/tables/tql.cpp - playground/tables/viz.cpp - playground/tables/viz.h - playground/tables/llm_anthropic.cpp - playground/tables/llm_anthropic.h - ... Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
296 lines
11 KiB
C++
296 lines
11 KiB
C++
// llm_anthropic.cpp — cliente Anthropic minimal via cURL popen.
|
|
// Ver issue 0080.
|
|
#include "llm_anthropic.h"
|
|
|
|
#include <cstdio>
|
|
#include <cstdlib>
|
|
#include <cstring>
|
|
#include <sstream>
|
|
#include <string>
|
|
|
|
namespace llm_anthropic {
|
|
|
|
using namespace data_table;
|
|
|
|
namespace {
|
|
|
|
// JSON escape minimal.
|
|
std::string json_escape(const std::string& s) {
|
|
std::string o;
|
|
o.reserve(s.size() + 8);
|
|
for (char c : s) {
|
|
switch (c) {
|
|
case '"': o += "\\\""; break;
|
|
case '\\': o += "\\\\"; break;
|
|
case '\n': o += "\\n"; break;
|
|
case '\r': o += "\\r"; break;
|
|
case '\t': o += "\\t"; break;
|
|
case '\b': o += "\\b"; break;
|
|
case '\f': o += "\\f"; break;
|
|
default:
|
|
if ((unsigned char)c < 0x20) {
|
|
char buf[8];
|
|
std::snprintf(buf, sizeof(buf), "\\u%04x", (int)(unsigned char)c);
|
|
o += buf;
|
|
} else {
|
|
o += c;
|
|
}
|
|
}
|
|
}
|
|
return o;
|
|
}
|
|
|
|
const char* col_type_doc(ColumnType t) {
|
|
switch (t) {
|
|
case ColumnType::String: return "string";
|
|
case ColumnType::Int: return "int";
|
|
case ColumnType::Float: return "float";
|
|
case ColumnType::Bool: return "bool";
|
|
case ColumnType::Date: return "date";
|
|
case ColumnType::Json: return "json";
|
|
case ColumnType::Auto: return "auto";
|
|
}
|
|
return "?";
|
|
}
|
|
|
|
std::string build_schema_block(const AskInput& in) {
|
|
std::ostringstream os;
|
|
os << "Available columns (stage 0 input):\n";
|
|
for (size_t i = 0; i < in.col_names.size(); ++i) {
|
|
os << " - " << in.col_names[i] << ": "
|
|
<< col_type_doc(i < in.col_types.size() ? in.col_types[i] : ColumnType::String)
|
|
<< "\n";
|
|
}
|
|
if (!in.joinable_names.empty()) {
|
|
os << "Joinable tables (for join clause):\n";
|
|
for (const auto& n : in.joinable_names) os << " - " << n << "\n";
|
|
}
|
|
return os.str();
|
|
}
|
|
|
|
std::string build_system_prompt(OutputMode mode) {
|
|
if (mode == OutputMode::TQL) {
|
|
return
|
|
"You are a TQL (Table Query Language) expert. Output ONLY a Lua code block. "
|
|
"TQL is a Lua table with shape:\n"
|
|
" return { version=1, display=\"table\"|\"bar\"|\"line\"|...,\n"
|
|
" main_source=\"name\", joins={ {alias,source,on,strategy,fields},... },\n"
|
|
" stages={ {filter={{op,col,value},...}, breakout={...}, aggregation={...}, sort={...} },... },\n"
|
|
" columns={ name = {type=\"int|float|...\", formula=\"[col]+1\"},... }\n"
|
|
" }\n"
|
|
"Stage 0 = Raw (filters + derived + sort, NO breakouts/aggs).\n"
|
|
"Stage 1+ groups (breakouts + aggregations).\n"
|
|
"Breakout granularity: append :year|:month|:week|:day|:hour to col name.\n"
|
|
"Aggregation functions: count|sum|avg|min|max|distinct|stddev|median|p25|p75|p90|p99|percentile.\n"
|
|
"Filter ops: '='|'!='|'<'|'<='|'>'|'>='|'contains'|'!contains'|'starts'|'ends'.\n"
|
|
"Sort: {{dir, col}, ...} where dir = 'asc'|'desc'.\n"
|
|
"Join strategies: 'left'|'inner'|'right'|'full'.\n"
|
|
"Formulas use Lua expression syntax with [col] for column refs.\n"
|
|
"Output format: ```lua\\n...\\n```";
|
|
}
|
|
return
|
|
"You are a DuckDB SQL expert. Output ONLY a SQL code block compatible with DuckDB.\n"
|
|
"Use CTEs to chain stages. Use date_trunc('month', col) for granularity.\n"
|
|
"Use quantile_cont(col, p) for percentiles. Use ? for bound params.\n"
|
|
"Joins: LEFT/INNER/RIGHT/FULL OUTER JOIN. String concat: ||. Aggregations: standard SQL.\n"
|
|
"Output format: ```sql\\n...\\n```";
|
|
}
|
|
|
|
} // anon
|
|
|
|
std::string build_request_body(const AskInput& in) {
|
|
std::string system_msg = build_system_prompt(in.mode);
|
|
std::string schema = build_schema_block(in);
|
|
|
|
std::ostringstream user_msg;
|
|
user_msg << "Question: " << in.question << "\n\n"
|
|
<< schema << "\n";
|
|
if (!in.tql_current.empty()) {
|
|
user_msg << "Current TQL:\n```lua\n" << in.tql_current << "\n```\n";
|
|
}
|
|
|
|
std::string model = in.model.empty() ? "claude-sonnet-4-6" : in.model;
|
|
|
|
std::ostringstream body;
|
|
body << "{"
|
|
<< "\"model\":\"" << json_escape(model) << "\","
|
|
<< "\"max_tokens\":" << in.max_tokens << ","
|
|
<< "\"system\":\"" << json_escape(system_msg) << "\","
|
|
<< "\"messages\":[{"
|
|
<< "\"role\":\"user\","
|
|
<< "\"content\":\"" << json_escape(user_msg.str()) << "\""
|
|
<< "}]"
|
|
<< "}";
|
|
return body.str();
|
|
}
|
|
|
|
std::string extract_code_block(const std::string& raw, const std::string& lang) {
|
|
// Buscar ```<lang> primero, sino ``` plain.
|
|
std::string fence_lang = "```" + lang;
|
|
auto pos = raw.find(fence_lang);
|
|
size_t code_start = std::string::npos;
|
|
if (pos != std::string::npos) {
|
|
code_start = pos + fence_lang.size();
|
|
} else {
|
|
pos = raw.find("```");
|
|
if (pos != std::string::npos) {
|
|
code_start = pos + 3;
|
|
// skip optional lang tag
|
|
while (code_start < raw.size() && raw[code_start] != '\n' &&
|
|
raw[code_start] != '\r' && std::isalnum((unsigned char)raw[code_start])) {
|
|
++code_start;
|
|
}
|
|
}
|
|
}
|
|
if (code_start == std::string::npos) {
|
|
// No fence — return raw stripped.
|
|
size_t i = 0; while (i < raw.size() && std::isspace((unsigned char)raw[i])) ++i;
|
|
size_t j = raw.size(); while (j > i && std::isspace((unsigned char)raw[j-1])) --j;
|
|
return raw.substr(i, j - i);
|
|
}
|
|
// Skip newline tras fence.
|
|
if (code_start < raw.size() && raw[code_start] == '\n') ++code_start;
|
|
auto end = raw.find("```", code_start);
|
|
if (end == std::string::npos) end = raw.size();
|
|
std::string code = raw.substr(code_start, end - code_start);
|
|
// Trim trailing newline.
|
|
while (!code.empty() && (code.back() == '\n' || code.back() == '\r')) code.pop_back();
|
|
return code;
|
|
}
|
|
|
|
std::string parse_response_text(const std::string& json) {
|
|
// Buscar pattern: "text":"..."
|
|
// Simple: primer occurrence de \"text\":\" tras \"type\":\"text\"
|
|
auto t = json.find("\"text\"");
|
|
while (t != std::string::npos) {
|
|
// Skip "text"
|
|
size_t i = t + 6;
|
|
// Skip whitespace y :
|
|
while (i < json.size() && (json[i] == ' ' || json[i] == ':' || json[i] == '\t')) ++i;
|
|
if (i >= json.size() || json[i] != '"') {
|
|
t = json.find("\"text\"", t + 1);
|
|
continue;
|
|
}
|
|
++i;
|
|
std::string out;
|
|
while (i < json.size() && json[i] != '"') {
|
|
if (json[i] == '\\' && i + 1 < json.size()) {
|
|
char esc = json[i+1];
|
|
if (esc == 'n') out += '\n';
|
|
else if (esc == 't') out += '\t';
|
|
else if (esc == 'r') out += '\r';
|
|
else if (esc == '"') out += '"';
|
|
else if (esc == '\\') out += '\\';
|
|
else if (esc == '/') out += '/';
|
|
else if (esc == 'u' && i + 5 < json.size()) {
|
|
// basic ascii \uXXXX
|
|
int code = 0;
|
|
for (int k = 0; k < 4; ++k) {
|
|
char c = json[i + 2 + k];
|
|
int v = (c >= '0' && c <= '9') ? c - '0'
|
|
: (c >= 'a' && c <= 'f') ? c - 'a' + 10
|
|
: (c >= 'A' && c <= 'F') ? c - 'A' + 10 : 0;
|
|
code = code * 16 + v;
|
|
}
|
|
if (code < 128) out += (char)code;
|
|
else out += '?';
|
|
i += 5;
|
|
} else {
|
|
out += esc;
|
|
}
|
|
i += 2;
|
|
} else {
|
|
out += json[i++];
|
|
}
|
|
}
|
|
return out;
|
|
}
|
|
return "";
|
|
}
|
|
|
|
namespace {
|
|
|
|
// Lee API key segun prioridad: param > env FN_LLM_API_KEY > pass anthropic/api-key.
|
|
std::string resolve_api_key(const std::string& provided) {
|
|
if (!provided.empty()) return provided;
|
|
const char* env = std::getenv("FN_LLM_API_KEY");
|
|
if (env && *env) return env;
|
|
// pass anthropic/api-key | head -n1
|
|
FILE* p = popen("pass anthropic/api-key 2>/dev/null | head -n1", "r");
|
|
if (!p) return "";
|
|
std::string out;
|
|
char buf[256];
|
|
while (fgets(buf, sizeof(buf), p)) out += buf;
|
|
pclose(p);
|
|
while (!out.empty() && (out.back() == '\n' || out.back() == '\r')) out.pop_back();
|
|
return out;
|
|
}
|
|
|
|
} // anon
|
|
|
|
std::string call_api(const std::string& body, const std::string& api_key,
|
|
std::string& error_out) {
|
|
error_out.clear();
|
|
// Test injection
|
|
const char* mock = std::getenv("FN_LLM_MOCK_RESPONSE");
|
|
if (mock && *mock) return mock;
|
|
|
|
std::string key = resolve_api_key(api_key);
|
|
if (key.empty()) {
|
|
error_out = "no API key (set FN_LLM_API_KEY env, pass param, or `pass anthropic/api-key`)";
|
|
return "";
|
|
}
|
|
const char* endpoint_env = std::getenv("FN_LLM_ENDPOINT");
|
|
std::string endpoint = endpoint_env && *endpoint_env
|
|
? endpoint_env
|
|
: "https://api.anthropic.com/v1/messages";
|
|
|
|
// popen "w+" no portable. Write body a tmp file y leer respuesta de curl
|
|
// por redireccion. Portable Unix/Mingw.
|
|
std::string tmp_in = std::tmpnam(nullptr);
|
|
std::string tmp_out = std::tmpnam(nullptr);
|
|
{
|
|
FILE* f = std::fopen(tmp_in.c_str(), "w");
|
|
if (!f) { error_out = "tmp file write fail"; return ""; }
|
|
std::fwrite(body.data(), 1, body.size(), f);
|
|
std::fclose(f);
|
|
}
|
|
std::string cmd2 = "curl -sS -X POST "
|
|
"-H \"content-type: application/json\" "
|
|
"-H \"anthropic-version: 2023-06-01\" "
|
|
"-H \"x-api-key: " + key + "\" "
|
|
"--data-binary @" + tmp_in + " " + endpoint
|
|
+ " > " + tmp_out + " 2>&1";
|
|
int rc = std::system(cmd2.c_str());
|
|
std::string resp;
|
|
{
|
|
FILE* f = std::fopen(tmp_out.c_str(), "r");
|
|
if (f) {
|
|
char buf[4096];
|
|
size_t n;
|
|
while ((n = std::fread(buf, 1, sizeof(buf), f)) > 0) resp.append(buf, n);
|
|
std::fclose(f);
|
|
}
|
|
}
|
|
std::remove(tmp_in.c_str());
|
|
std::remove(tmp_out.c_str());
|
|
if (rc != 0) {
|
|
error_out = "curl exit " + std::to_string(rc) + ": " + resp;
|
|
return "";
|
|
}
|
|
return resp;
|
|
}
|
|
|
|
AskResult ask(const AskInput& in, const std::string& api_key) {
|
|
AskResult r;
|
|
std::string body = build_request_body(in);
|
|
std::string raw_json = call_api(body, api_key, r.error);
|
|
if (!r.error.empty()) return r;
|
|
r.raw = parse_response_text(raw_json);
|
|
std::string lang = (in.mode == OutputMode::TQL) ? "lua" : "sql";
|
|
r.code = extract_code_block(r.raw, lang);
|
|
return r;
|
|
}
|
|
|
|
} // namespace llm_anthropic
|