#include "data_http.h" #include "http_client.h" #include "vendor/nlohmann/json.hpp" #include #include using json = nlohmann::json; namespace data_factory { static bool parse_url(const std::string& url, std::string& host, int& port) { auto pos = url.find("://"); std::string rest = (pos != std::string::npos) ? url.substr(pos + 3) : url; auto colon = rest.find(':'); if (colon == std::string::npos) { host = rest; port = 80; } else { host = rest.substr(0, colon); port = std::atoi(rest.substr(colon + 1).c_str()); } return !host.empty() && port > 0; } static std::string get_str(const json& j, const char* key) { if (!j.contains(key) || j[key].is_null()) return ""; if (j[key].is_string()) return j[key].get(); return j[key].dump(); } static long long get_int64(const json& j, const char* key) { if (!j.contains(key) || j[key].is_null()) return 0; if (j[key].is_number()) return j[key].get(); return 0; } static bool get_bool(const json& j, const char* key, bool def) { if (!j.contains(key) || j[key].is_null()) return def; if (j[key].is_boolean()) return j[key].get(); if (j[key].is_number()) return j[key].get() != 0; return def; } static std::vector get_str_array(const json& j, const char* key) { std::vector out; if (!j.contains(key) || !j[key].is_array()) return out; for (auto& v : j[key]) { if (v.is_string()) out.push_back(v.get()); } return out; } // tags can be stored as csv string or array. Tolerate both. static std::vector get_tags(const json& j) { std::vector out; if (j.contains("tags") && j["tags"].is_array()) { for (auto& v : j["tags"]) { if (v.is_string()) out.push_back(v.get()); } return out; } std::string csv = get_str(j, "tags_csv"); if (csv.empty()) csv = get_str(j, "tags"); if (csv.empty()) return out; std::string cur; for (char c : csv) { if (c == ',') { if (!cur.empty()) out.push_back(cur); cur.clear(); } else { cur.push_back(c); } } if (!cur.empty()) out.push_back(cur); return out; } static void parse_node(const json& j, Node& n) { n.id = get_str(j, "id"); n.kind = get_str(j, "kind"); n.name = get_str(j, "name"); n.function_id = get_str(j, "function_id"); n.description = get_str(j, "description"); n.schedule_cron = get_str(j, "schedule_cron"); n.enabled = get_bool(j, "enabled", true); n.tags = get_tags(j); n.created_at = get_str(j, "created_at"); n.updated_at = get_str(j, "updated_at"); } static void parse_run(const json& j, Run& r) { r.id = get_str(j, "id"); r.node_id = get_str(j, "node_id"); r.started_at = get_str(j, "started_at"); r.finished_at = get_str(j, "finished_at"); r.status = get_str(j, "status"); r.rows_in = get_int64(j, "rows_in"); r.rows_out = get_int64(j, "rows_out"); r.kb_in = get_int64(j, "kb_in"); r.kb_out = get_int64(j, "kb_out"); r.duration_ms = get_int64(j, "duration_ms"); r.trigger = get_str(j, "trigger"); r.error = get_str(j, "error"); } static void parse_db(const json& j, DatabaseInfo& d) { d.id = get_str(j, "id"); d.kind = get_str(j, "kind"); d.label = get_str(j, "label"); d.uri = get_str(j, "uri"); d.description = get_str(j, "description"); d.table_count = get_int64(j, "table_count"); d.size_bytes = get_int64(j, "size_bytes"); d.last_seen_at = get_str(j, "last_seen_at"); } bool list_nodes_http(const std::string& api_url, const std::string& kind, std::vector& out) { std::string host; int port; if (!parse_url(api_url, host, port)) return false; HttpClient cli(host, port); std::string path = "/api/datafactory/nodes"; if (!kind.empty()) path += "?kind=" + kind; auto res = cli.get(path); if (!res.ok()) { fprintf(stderr, "[df_http] list_nodes failed: status=%d\n", res.status); return false; } auto j = json::parse(res.body, nullptr, false); if (!j.is_object() || !j.contains("nodes") || !j["nodes"].is_array()) { return false; } out.clear(); for (auto& item : j["nodes"]) { Node n; parse_node(item, n); out.push_back(std::move(n)); } return true; } bool list_runs_http(const std::string& api_url, const std::string& node_id, int limit, std::vector& out) { std::string host; int port; if (!parse_url(api_url, host, port)) return false; HttpClient cli(host, port); std::string path = "/api/datafactory/runs?limit=" + std::to_string(limit); if (!node_id.empty()) path += "&node_id=" + node_id; auto res = cli.get(path); if (!res.ok()) { fprintf(stderr, "[df_http] list_runs failed: status=%d\n", res.status); return false; } auto j = json::parse(res.body, nullptr, false); if (!j.is_object() || !j.contains("runs") || !j["runs"].is_array()) { return false; } out.clear(); for (auto& item : j["runs"]) { Run r; parse_run(item, r); out.push_back(std::move(r)); } return true; } bool list_databases_http(const std::string& api_url, std::vector& out) { std::string host; int port; if (!parse_url(api_url, host, port)) return false; HttpClient cli(host, port); auto res = cli.get("/api/datafactory/databases"); if (!res.ok()) { fprintf(stderr, "[df_http] list_databases failed: status=%d\n", res.status); return false; } auto j = json::parse(res.body, nullptr, false); if (!j.is_object() || !j.contains("databases") || !j["databases"].is_array()) { return false; } out.clear(); for (auto& item : j["databases"]) { DatabaseInfo d; parse_db(item, d); out.push_back(std::move(d)); } return true; } bool get_function_http(const std::string& api_url, const std::string& function_id, FnInfo& out) { std::string host; int port; if (!parse_url(api_url, host, port)) return false; if (function_id.empty()) return false; HttpClient cli(host, port); auto res = cli.get("/api/functions/" + function_id); if (!res.ok()) { fprintf(stderr, "[df_http] get_function(%s) failed: status=%d\n", function_id.c_str(), res.status); return false; } auto j = json::parse(res.body, nullptr, false); if (!j.is_object()) return false; out.id = get_str(j, "id"); out.name = get_str(j, "name"); out.description = get_str(j, "description"); out.signature = get_str(j, "signature"); out.purity = get_str(j, "purity"); out.domain = get_str(j, "domain"); out.lang = get_str(j, "lang"); out.uses_functions = get_str_array(j, "uses_functions"); out.uses_types = get_str_array(j, "uses_types"); return true; } } // namespace data_factory