Files
egutierrez 100aeaa1fc chore: auto-commit (12 archivos)
- playground/tables/CMakeLists.txt
- playground/tables/data_table.cpp
- playground/tables/data_table_logic.cpp
- playground/tables/data_table_logic.h
- playground/tables/self_test.cpp
- playground/tables/tql.cpp
- playground/tables/viz.cpp
- playground/tables/viz.h
- playground/tables/llm_anthropic.cpp
- playground/tables/llm_anthropic.h
- ...

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-13 00:50:35 +02:00

296 lines
11 KiB
C++

// llm_anthropic.cpp — cliente Anthropic minimal via cURL popen.
// Ver issue 0080.
#include "llm_anthropic.h"
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <sstream>
#include <string>
namespace llm_anthropic {
using namespace data_table;
namespace {
// JSON escape minimal.
std::string json_escape(const std::string& s) {
std::string o;
o.reserve(s.size() + 8);
for (char c : s) {
switch (c) {
case '"': o += "\\\""; break;
case '\\': o += "\\\\"; break;
case '\n': o += "\\n"; break;
case '\r': o += "\\r"; break;
case '\t': o += "\\t"; break;
case '\b': o += "\\b"; break;
case '\f': o += "\\f"; break;
default:
if ((unsigned char)c < 0x20) {
char buf[8];
std::snprintf(buf, sizeof(buf), "\\u%04x", (int)(unsigned char)c);
o += buf;
} else {
o += c;
}
}
}
return o;
}
const char* col_type_doc(ColumnType t) {
switch (t) {
case ColumnType::String: return "string";
case ColumnType::Int: return "int";
case ColumnType::Float: return "float";
case ColumnType::Bool: return "bool";
case ColumnType::Date: return "date";
case ColumnType::Json: return "json";
case ColumnType::Auto: return "auto";
}
return "?";
}
std::string build_schema_block(const AskInput& in) {
std::ostringstream os;
os << "Available columns (stage 0 input):\n";
for (size_t i = 0; i < in.col_names.size(); ++i) {
os << " - " << in.col_names[i] << ": "
<< col_type_doc(i < in.col_types.size() ? in.col_types[i] : ColumnType::String)
<< "\n";
}
if (!in.joinable_names.empty()) {
os << "Joinable tables (for join clause):\n";
for (const auto& n : in.joinable_names) os << " - " << n << "\n";
}
return os.str();
}
std::string build_system_prompt(OutputMode mode) {
if (mode == OutputMode::TQL) {
return
"You are a TQL (Table Query Language) expert. Output ONLY a Lua code block. "
"TQL is a Lua table with shape:\n"
" return { version=1, display=\"table\"|\"bar\"|\"line\"|...,\n"
" main_source=\"name\", joins={ {alias,source,on,strategy,fields},... },\n"
" stages={ {filter={{op,col,value},...}, breakout={...}, aggregation={...}, sort={...} },... },\n"
" columns={ name = {type=\"int|float|...\", formula=\"[col]+1\"},... }\n"
" }\n"
"Stage 0 = Raw (filters + derived + sort, NO breakouts/aggs).\n"
"Stage 1+ groups (breakouts + aggregations).\n"
"Breakout granularity: append :year|:month|:week|:day|:hour to col name.\n"
"Aggregation functions: count|sum|avg|min|max|distinct|stddev|median|p25|p75|p90|p99|percentile.\n"
"Filter ops: '='|'!='|'<'|'<='|'>'|'>='|'contains'|'!contains'|'starts'|'ends'.\n"
"Sort: {{dir, col}, ...} where dir = 'asc'|'desc'.\n"
"Join strategies: 'left'|'inner'|'right'|'full'.\n"
"Formulas use Lua expression syntax with [col] for column refs.\n"
"Output format: ```lua\\n...\\n```";
}
return
"You are a DuckDB SQL expert. Output ONLY a SQL code block compatible with DuckDB.\n"
"Use CTEs to chain stages. Use date_trunc('month', col) for granularity.\n"
"Use quantile_cont(col, p) for percentiles. Use ? for bound params.\n"
"Joins: LEFT/INNER/RIGHT/FULL OUTER JOIN. String concat: ||. Aggregations: standard SQL.\n"
"Output format: ```sql\\n...\\n```";
}
} // anon
std::string build_request_body(const AskInput& in) {
std::string system_msg = build_system_prompt(in.mode);
std::string schema = build_schema_block(in);
std::ostringstream user_msg;
user_msg << "Question: " << in.question << "\n\n"
<< schema << "\n";
if (!in.tql_current.empty()) {
user_msg << "Current TQL:\n```lua\n" << in.tql_current << "\n```\n";
}
std::string model = in.model.empty() ? "claude-sonnet-4-6" : in.model;
std::ostringstream body;
body << "{"
<< "\"model\":\"" << json_escape(model) << "\","
<< "\"max_tokens\":" << in.max_tokens << ","
<< "\"system\":\"" << json_escape(system_msg) << "\","
<< "\"messages\":[{"
<< "\"role\":\"user\","
<< "\"content\":\"" << json_escape(user_msg.str()) << "\""
<< "}]"
<< "}";
return body.str();
}
std::string extract_code_block(const std::string& raw, const std::string& lang) {
// Buscar ```<lang> primero, sino ``` plain.
std::string fence_lang = "```" + lang;
auto pos = raw.find(fence_lang);
size_t code_start = std::string::npos;
if (pos != std::string::npos) {
code_start = pos + fence_lang.size();
} else {
pos = raw.find("```");
if (pos != std::string::npos) {
code_start = pos + 3;
// skip optional lang tag
while (code_start < raw.size() && raw[code_start] != '\n' &&
raw[code_start] != '\r' && std::isalnum((unsigned char)raw[code_start])) {
++code_start;
}
}
}
if (code_start == std::string::npos) {
// No fence — return raw stripped.
size_t i = 0; while (i < raw.size() && std::isspace((unsigned char)raw[i])) ++i;
size_t j = raw.size(); while (j > i && std::isspace((unsigned char)raw[j-1])) --j;
return raw.substr(i, j - i);
}
// Skip newline tras fence.
if (code_start < raw.size() && raw[code_start] == '\n') ++code_start;
auto end = raw.find("```", code_start);
if (end == std::string::npos) end = raw.size();
std::string code = raw.substr(code_start, end - code_start);
// Trim trailing newline.
while (!code.empty() && (code.back() == '\n' || code.back() == '\r')) code.pop_back();
return code;
}
std::string parse_response_text(const std::string& json) {
// Buscar pattern: "text":"..."
// Simple: primer occurrence de \"text\":\" tras \"type\":\"text\"
auto t = json.find("\"text\"");
while (t != std::string::npos) {
// Skip "text"
size_t i = t + 6;
// Skip whitespace y :
while (i < json.size() && (json[i] == ' ' || json[i] == ':' || json[i] == '\t')) ++i;
if (i >= json.size() || json[i] != '"') {
t = json.find("\"text\"", t + 1);
continue;
}
++i;
std::string out;
while (i < json.size() && json[i] != '"') {
if (json[i] == '\\' && i + 1 < json.size()) {
char esc = json[i+1];
if (esc == 'n') out += '\n';
else if (esc == 't') out += '\t';
else if (esc == 'r') out += '\r';
else if (esc == '"') out += '"';
else if (esc == '\\') out += '\\';
else if (esc == '/') out += '/';
else if (esc == 'u' && i + 5 < json.size()) {
// basic ascii \uXXXX
int code = 0;
for (int k = 0; k < 4; ++k) {
char c = json[i + 2 + k];
int v = (c >= '0' && c <= '9') ? c - '0'
: (c >= 'a' && c <= 'f') ? c - 'a' + 10
: (c >= 'A' && c <= 'F') ? c - 'A' + 10 : 0;
code = code * 16 + v;
}
if (code < 128) out += (char)code;
else out += '?';
i += 5;
} else {
out += esc;
}
i += 2;
} else {
out += json[i++];
}
}
return out;
}
return "";
}
namespace {
// Lee API key segun prioridad: param > env FN_LLM_API_KEY > pass anthropic/api-key.
std::string resolve_api_key(const std::string& provided) {
if (!provided.empty()) return provided;
const char* env = std::getenv("FN_LLM_API_KEY");
if (env && *env) return env;
// pass anthropic/api-key | head -n1
FILE* p = popen("pass anthropic/api-key 2>/dev/null | head -n1", "r");
if (!p) return "";
std::string out;
char buf[256];
while (fgets(buf, sizeof(buf), p)) out += buf;
pclose(p);
while (!out.empty() && (out.back() == '\n' || out.back() == '\r')) out.pop_back();
return out;
}
} // anon
std::string call_api(const std::string& body, const std::string& api_key,
std::string& error_out) {
error_out.clear();
// Test injection
const char* mock = std::getenv("FN_LLM_MOCK_RESPONSE");
if (mock && *mock) return mock;
std::string key = resolve_api_key(api_key);
if (key.empty()) {
error_out = "no API key (set FN_LLM_API_KEY env, pass param, or `pass anthropic/api-key`)";
return "";
}
const char* endpoint_env = std::getenv("FN_LLM_ENDPOINT");
std::string endpoint = endpoint_env && *endpoint_env
? endpoint_env
: "https://api.anthropic.com/v1/messages";
// popen "w+" no portable. Write body a tmp file y leer respuesta de curl
// por redireccion. Portable Unix/Mingw.
std::string tmp_in = std::tmpnam(nullptr);
std::string tmp_out = std::tmpnam(nullptr);
{
FILE* f = std::fopen(tmp_in.c_str(), "w");
if (!f) { error_out = "tmp file write fail"; return ""; }
std::fwrite(body.data(), 1, body.size(), f);
std::fclose(f);
}
std::string cmd2 = "curl -sS -X POST "
"-H \"content-type: application/json\" "
"-H \"anthropic-version: 2023-06-01\" "
"-H \"x-api-key: " + key + "\" "
"--data-binary @" + tmp_in + " " + endpoint
+ " > " + tmp_out + " 2>&1";
int rc = std::system(cmd2.c_str());
std::string resp;
{
FILE* f = std::fopen(tmp_out.c_str(), "r");
if (f) {
char buf[4096];
size_t n;
while ((n = std::fread(buf, 1, sizeof(buf), f)) > 0) resp.append(buf, n);
std::fclose(f);
}
}
std::remove(tmp_in.c_str());
std::remove(tmp_out.c_str());
if (rc != 0) {
error_out = "curl exit " + std::to_string(rc) + ": " + resp;
return "";
}
return resp;
}
AskResult ask(const AskInput& in, const std::string& api_key) {
AskResult r;
std::string body = build_request_body(in);
std::string raw_json = call_api(body, api_key, r.error);
if (!r.error.empty()) return r;
r.raw = parse_response_text(raw_json);
std::string lang = (in.mode == OutputMode::TQL) ? "lua" : "sql";
r.code = extract_code_block(r.raw, lang);
return r;
}
} // namespace llm_anthropic