// llm_anthropic.cpp — cliente Anthropic minimal via cURL popen. // Ver issue 0080. #include "llm_anthropic.h" #include #include #include #include #include namespace llm_anthropic { using namespace data_table; namespace { // JSON escape minimal. std::string json_escape(const std::string& s) { std::string o; o.reserve(s.size() + 8); for (char c : s) { switch (c) { case '"': o += "\\\""; break; case '\\': o += "\\\\"; break; case '\n': o += "\\n"; break; case '\r': o += "\\r"; break; case '\t': o += "\\t"; break; case '\b': o += "\\b"; break; case '\f': o += "\\f"; break; default: if ((unsigned char)c < 0x20) { char buf[8]; std::snprintf(buf, sizeof(buf), "\\u%04x", (int)(unsigned char)c); o += buf; } else { o += c; } } } return o; } const char* col_type_doc(ColumnType t) { switch (t) { case ColumnType::String: return "string"; case ColumnType::Int: return "int"; case ColumnType::Float: return "float"; case ColumnType::Bool: return "bool"; case ColumnType::Date: return "date"; case ColumnType::Json: return "json"; case ColumnType::Auto: return "auto"; } return "?"; } std::string build_schema_block(const AskInput& in) { std::ostringstream os; os << "Available columns (stage 0 input):\n"; for (size_t i = 0; i < in.col_names.size(); ++i) { os << " - " << in.col_names[i] << ": " << col_type_doc(i < in.col_types.size() ? in.col_types[i] : ColumnType::String) << "\n"; } if (!in.joinable_names.empty()) { os << "Joinable tables (for join clause):\n"; for (const auto& n : in.joinable_names) os << " - " << n << "\n"; } return os.str(); } std::string build_system_prompt(OutputMode mode) { if (mode == OutputMode::TQL) { return "You are a TQL (Table Query Language) expert. Output ONLY a Lua code block. " "TQL is a Lua table with shape:\n" " return { version=1, display=\"table\"|\"bar\"|\"line\"|...,\n" " main_source=\"name\", joins={ {alias,source,on,strategy,fields},... },\n" " stages={ {filter={{op,col,value},...}, breakout={...}, aggregation={...}, sort={...} },... },\n" " columns={ name = {type=\"int|float|...\", formula=\"[col]+1\"},... }\n" " }\n" "Stage 0 = Raw (filters + derived + sort, NO breakouts/aggs).\n" "Stage 1+ groups (breakouts + aggregations).\n" "Breakout granularity: append :year|:month|:week|:day|:hour to col name.\n" "Aggregation functions: count|sum|avg|min|max|distinct|stddev|median|p25|p75|p90|p99|percentile.\n" "Filter ops: '='|'!='|'<'|'<='|'>'|'>='|'contains'|'!contains'|'starts'|'ends'.\n" "Sort: {{dir, col}, ...} where dir = 'asc'|'desc'.\n" "Join strategies: 'left'|'inner'|'right'|'full'.\n" "Formulas use Lua expression syntax with [col] for column refs.\n" "Output format: ```lua\\n...\\n```"; } return "You are a DuckDB SQL expert. Output ONLY a SQL code block compatible with DuckDB.\n" "Use CTEs to chain stages. Use date_trunc('month', col) for granularity.\n" "Use quantile_cont(col, p) for percentiles. Use ? for bound params.\n" "Joins: LEFT/INNER/RIGHT/FULL OUTER JOIN. String concat: ||. Aggregations: standard SQL.\n" "Output format: ```sql\\n...\\n```"; } } // anon std::string build_request_body(const AskInput& in) { std::string system_msg = build_system_prompt(in.mode); std::string schema = build_schema_block(in); std::ostringstream user_msg; user_msg << "Question: " << in.question << "\n\n" << schema << "\n"; if (!in.tql_current.empty()) { user_msg << "Current TQL:\n```lua\n" << in.tql_current << "\n```\n"; } std::string model = in.model.empty() ? "claude-sonnet-4-6" : in.model; std::ostringstream body; body << "{" << "\"model\":\"" << json_escape(model) << "\"," << "\"max_tokens\":" << in.max_tokens << "," << "\"system\":\"" << json_escape(system_msg) << "\"," << "\"messages\":[{" << "\"role\":\"user\"," << "\"content\":\"" << json_escape(user_msg.str()) << "\"" << "}]" << "}"; return body.str(); } std::string extract_code_block(const std::string& raw, const std::string& lang) { // Buscar ``` primero, sino ``` plain. std::string fence_lang = "```" + lang; auto pos = raw.find(fence_lang); size_t code_start = std::string::npos; if (pos != std::string::npos) { code_start = pos + fence_lang.size(); } else { pos = raw.find("```"); if (pos != std::string::npos) { code_start = pos + 3; // skip optional lang tag while (code_start < raw.size() && raw[code_start] != '\n' && raw[code_start] != '\r' && std::isalnum((unsigned char)raw[code_start])) { ++code_start; } } } if (code_start == std::string::npos) { // No fence — return raw stripped. size_t i = 0; while (i < raw.size() && std::isspace((unsigned char)raw[i])) ++i; size_t j = raw.size(); while (j > i && std::isspace((unsigned char)raw[j-1])) --j; return raw.substr(i, j - i); } // Skip newline tras fence. if (code_start < raw.size() && raw[code_start] == '\n') ++code_start; auto end = raw.find("```", code_start); if (end == std::string::npos) end = raw.size(); std::string code = raw.substr(code_start, end - code_start); // Trim trailing newline. while (!code.empty() && (code.back() == '\n' || code.back() == '\r')) code.pop_back(); return code; } std::string parse_response_text(const std::string& json) { // Buscar pattern: "text":"..." // Simple: primer occurrence de \"text\":\" tras \"type\":\"text\" auto t = json.find("\"text\""); while (t != std::string::npos) { // Skip "text" size_t i = t + 6; // Skip whitespace y : while (i < json.size() && (json[i] == ' ' || json[i] == ':' || json[i] == '\t')) ++i; if (i >= json.size() || json[i] != '"') { t = json.find("\"text\"", t + 1); continue; } ++i; std::string out; while (i < json.size() && json[i] != '"') { if (json[i] == '\\' && i + 1 < json.size()) { char esc = json[i+1]; if (esc == 'n') out += '\n'; else if (esc == 't') out += '\t'; else if (esc == 'r') out += '\r'; else if (esc == '"') out += '"'; else if (esc == '\\') out += '\\'; else if (esc == '/') out += '/'; else if (esc == 'u' && i + 5 < json.size()) { // basic ascii \uXXXX int code = 0; for (int k = 0; k < 4; ++k) { char c = json[i + 2 + k]; int v = (c >= '0' && c <= '9') ? c - '0' : (c >= 'a' && c <= 'f') ? c - 'a' + 10 : (c >= 'A' && c <= 'F') ? c - 'A' + 10 : 0; code = code * 16 + v; } if (code < 128) out += (char)code; else out += '?'; i += 5; } else { out += esc; } i += 2; } else { out += json[i++]; } } return out; } return ""; } namespace { // Lee API key segun prioridad: param > env FN_LLM_API_KEY > pass anthropic/api-key. std::string resolve_api_key(const std::string& provided) { if (!provided.empty()) return provided; const char* env = std::getenv("FN_LLM_API_KEY"); if (env && *env) return env; // pass anthropic/api-key | head -n1 FILE* p = popen("pass anthropic/api-key 2>/dev/null | head -n1", "r"); if (!p) return ""; std::string out; char buf[256]; while (fgets(buf, sizeof(buf), p)) out += buf; pclose(p); while (!out.empty() && (out.back() == '\n' || out.back() == '\r')) out.pop_back(); return out; } } // anon std::string call_api(const std::string& body, const std::string& api_key, std::string& error_out) { error_out.clear(); // Test injection const char* mock = std::getenv("FN_LLM_MOCK_RESPONSE"); if (mock && *mock) return mock; std::string key = resolve_api_key(api_key); if (key.empty()) { error_out = "no API key (set FN_LLM_API_KEY env, pass param, or `pass anthropic/api-key`)"; return ""; } const char* endpoint_env = std::getenv("FN_LLM_ENDPOINT"); std::string endpoint = endpoint_env && *endpoint_env ? endpoint_env : "https://api.anthropic.com/v1/messages"; // popen "w+" no portable. Write body a tmp file y leer respuesta de curl // por redireccion. Portable Unix/Mingw. std::string tmp_in = std::tmpnam(nullptr); std::string tmp_out = std::tmpnam(nullptr); { FILE* f = std::fopen(tmp_in.c_str(), "w"); if (!f) { error_out = "tmp file write fail"; return ""; } std::fwrite(body.data(), 1, body.size(), f); std::fclose(f); } std::string cmd2 = "curl -sS -X POST " "-H \"content-type: application/json\" " "-H \"anthropic-version: 2023-06-01\" " "-H \"x-api-key: " + key + "\" " "--data-binary @" + tmp_in + " " + endpoint + " > " + tmp_out + " 2>&1"; int rc = std::system(cmd2.c_str()); std::string resp; { FILE* f = std::fopen(tmp_out.c_str(), "r"); if (f) { char buf[4096]; size_t n; while ((n = std::fread(buf, 1, sizeof(buf), f)) > 0) resp.append(buf, n); std::fclose(f); } } std::remove(tmp_in.c_str()); std::remove(tmp_out.c_str()); if (rc != 0) { error_out = "curl exit " + std::to_string(rc) + ": " + resp; return ""; } return resp; } AskResult ask(const AskInput& in, const std::string& api_key) { AskResult r; std::string body = build_request_body(in); std::string raw_json = call_api(body, api_key, r.error); if (!r.error.empty()) return r; r.raw = parse_response_text(raw_json); std::string lang = (in.mode == OutputMode::TQL) ? "lua" : "sql"; r.code = extract_code_block(r.raw, lang); return r; } } // namespace llm_anthropic