// Tests for data_table::join_tables (cpp/functions/core/join_tables). // Pure multi-key hash join — no ImGui, no I/O. #define CATCH_CONFIG_MAIN #include "catch_amalgamated.hpp" #include "core/join_tables.h" using namespace data_table; // --------------------------------------------------------------------------- // Helpers // --------------------------------------------------------------------------- // Build a TableInput from a flat list of string literals (row-major). // Ownership: the TableInput holds its own cells vector. struct TableData { std::vector header_strs; std::vector types; std::vector cell_ptrs; std::vector cell_strs; TableInput ti; void build(const std::string& name, std::initializer_list headers, std::initializer_list cells_flat) { header_strs = std::vector(headers); cell_strs = std::vector(cells_flat); types.assign(header_strs.size(), ColumnType::Auto); cell_ptrs.clear(); for (auto& s : cell_strs) cell_ptrs.push_back(s.c_str()); ti.name = name; ti.headers = header_strs; ti.types = types; ti.cells = cell_ptrs.data(); ti.cols = (int)header_strs.size(); ti.rows = cell_ptrs.empty() ? 0 : (int)(cell_strs.size() / header_strs.size()); } }; static Join make_join(JoinStrategy strat, std::initializer_list> on, const std::string& alias = "", std::initializer_list fields = {}) { Join j; j.strategy = strat; j.alias = alias; j.on = on; j.fields = fields; return j; } // --------------------------------------------------------------------------- // inner join basico // --------------------------------------------------------------------------- TEST_CASE("inner join basico") { // left: id, name std::vector lhdr = {"id", "name"}; std::vector ltyp = {ColumnType::Auto, ColumnType::Auto}; std::vector lraw = {"1","Alice", "2","Bob", "3","Carol"}; std::vector lptr; for (auto& s : lraw) lptr.push_back(s.c_str()); TableData right; right.build("orders", {"user_id","amount"}, {"1","100", "2","200", "2","150"}); Join jn = make_join(JoinStrategy::Inner, {{"id","user_id"}}); StageOutput res = join_tables(lptr.data(), 3, 2, lhdr, ltyp, right.ti, jn); // Alice->100, Bob->200, Bob->150. Carol has no match. REQUIRE(res.rows == 3); REQUIRE(res.cols == 4); // Check headers: id, name, user_id, amount REQUIRE(res.headers[0] == "id"); REQUIRE(res.headers[1] == "name"); REQUIRE(res.headers[2] == "user_id"); REQUIRE(res.headers[3] == "amount"); } // --------------------------------------------------------------------------- // left join con orphans // --------------------------------------------------------------------------- TEST_CASE("left join con orphans") { std::vector lhdr = {"id", "name"}; std::vector ltyp = {ColumnType::Auto, ColumnType::Auto}; std::vector lraw = {"1","Alice", "2","Bob", "3","Carol"}; std::vector lptr; for (auto& s : lraw) lptr.push_back(s.c_str()); TableData right; right.build("orders", {"user_id","amount"}, {"1","100"}); Join jn = make_join(JoinStrategy::Left, {{"id","user_id"}}); StageOutput res = join_tables(lptr.data(), 3, 2, lhdr, ltyp, right.ti, jn); // Alice matches, Bob and Carol are left orphans -> 3 rows total. REQUIRE(res.rows == 3); // Verify the unmatched rows have empty right cells. // Row 0: Alice, amount=100 REQUIRE(std::string(res.cells[0 * 4 + 3]) == "100"); // Row 1: Bob, amount="" (orphan) REQUIRE(std::string(res.cells[1 * 4 + 3]) == ""); // Row 2: Carol, amount="" (orphan) REQUIRE(std::string(res.cells[2 * 4 + 3]) == ""); } // --------------------------------------------------------------------------- // right join con orphans // --------------------------------------------------------------------------- TEST_CASE("right join con orphans") { std::vector lhdr = {"id", "name"}; std::vector ltyp = {ColumnType::Auto, ColumnType::Auto}; // Only Alice present on left. std::vector lraw = {"1","Alice"}; std::vector lptr; for (auto& s : lraw) lptr.push_back(s.c_str()); TableData right; // user_id 1 and 2 on right; user_id 2 has no match on left. right.build("orders", {"user_id","amount"}, {"1","100", "2","200"}); Join jn = make_join(JoinStrategy::Right, {{"id","user_id"}}); StageOutput res = join_tables(lptr.data(), 1, 2, lhdr, ltyp, right.ti, jn); // Row 0: Alice+100 (matched), Row 1: ""+"2"+"200" (right orphan). REQUIRE(res.rows == 2); // Row 0 has Alice's name. REQUIRE(std::string(res.cells[0 * 4 + 1]) == "Alice"); // Row 1 left side is empty. REQUIRE(std::string(res.cells[1 * 4 + 0]) == ""); REQUIRE(std::string(res.cells[1 * 4 + 1]) == ""); // Row 1 right amount is 200. REQUIRE(std::string(res.cells[1 * 4 + 3]) == "200"); } // --------------------------------------------------------------------------- // full outer join // --------------------------------------------------------------------------- TEST_CASE("full outer join") { std::vector lhdr = {"id","val"}; std::vector ltyp = {ColumnType::Auto, ColumnType::Auto}; std::vector lraw = {"A","1", "B","2"}; std::vector lptr; for (auto& s : lraw) lptr.push_back(s.c_str()); TableData right; // B and C on right; A has no match on right. right.build("r", {"key","score"}, {"B","10", "C","30"}); Join jn = make_join(JoinStrategy::Full, {{"id","key"}}); StageOutput res = join_tables(lptr.data(), 2, 2, lhdr, ltyp, right.ti, jn); // A (left orphan), B (matched), C (right orphan) -> 3 rows. REQUIRE(res.rows == 3); } // --------------------------------------------------------------------------- // multi-key join (2 keys) // --------------------------------------------------------------------------- TEST_CASE("multi-key join con 2 keys") { // left: dept, year, budget std::vector lhdr = {"dept","year","budget"}; std::vector ltyp(3, ColumnType::Auto); std::vector lraw = { "eng","2024","100", "eng","2025","110", "hr", "2024","50" }; std::vector lptr; for (auto& s : lraw) lptr.push_back(s.c_str()); TableData right; // right: dept, year, headcount right.build("headcount", {"dept","year","headcount"}, { "eng","2024","20", "hr", "2024","5" // eng-2025 missing on right }); Join jn = make_join(JoinStrategy::Inner, {{"dept","dept"},{"year","year"}}); StageOutput res = join_tables(lptr.data(), 3, 3, lhdr, ltyp, right.ti, jn); // eng-2024 matches, hr-2024 matches, eng-2025 has no match -> 2 rows. REQUIRE(res.rows == 2); } // --------------------------------------------------------------------------- // key con duplicados (producto cartesiano por clave duplicada) // --------------------------------------------------------------------------- TEST_CASE("key con duplicados produce producto cartesiano") { // left: 1 row with id=X std::vector lhdr = {"id","lval"}; std::vector ltyp(2, ColumnType::Auto); std::vector lraw = {"X","L1"}; std::vector lptr; for (auto& s : lraw) lptr.push_back(s.c_str()); TableData right; // right: 3 rows with key=X right.build("r", {"key","rval"}, {"X","R1", "X","R2", "X","R3"}); Join jn = make_join(JoinStrategy::Inner, {{"id","key"}}); StageOutput res = join_tables(lptr.data(), 1, 2, lhdr, ltyp, right.ti, jn); // 1 left row x 3 matching right rows = 3 output rows. REQUIRE(res.rows == 3); REQUIRE(std::string(res.cells[0 * 4 + 3]) == "R1"); REQUIRE(std::string(res.cells[1 * 4 + 3]) == "R2"); REQUIRE(std::string(res.cells[2 * 4 + 3]) == "R3"); } // --------------------------------------------------------------------------- // alias en columnas del derecho // --------------------------------------------------------------------------- TEST_CASE("alias prefija columnas del derecho") { std::vector lhdr = {"id"}; std::vector ltyp = {ColumnType::Auto}; std::vector lraw = {"1"}; std::vector lptr; for (auto& s : lraw) lptr.push_back(s.c_str()); TableData right; right.build("r", {"id","score"}, {"1","99"}); Join jn = make_join(JoinStrategy::Inner, {{"id","id"}}, "r"); StageOutput res = join_tables(lptr.data(), 1, 1, lhdr, ltyp, right.ti, jn); REQUIRE(res.cols == 3); REQUIRE(res.headers[1] == "r.id"); REQUIRE(res.headers[2] == "r.score"); } // --------------------------------------------------------------------------- // fields subset: solo algunas columnas del derecho // --------------------------------------------------------------------------- TEST_CASE("fields subset incluye solo columnas especificadas") { std::vector lhdr = {"id"}; std::vector ltyp = {ColumnType::Auto}; std::vector lraw = {"1"}; std::vector lptr; for (auto& s : lraw) lptr.push_back(s.c_str()); TableData right; right.build("r", {"id","score","notes"}, {"1","42","hello"}); Join jn = make_join(JoinStrategy::Inner, {{"id","id"}}, "", {"score"}); StageOutput res = join_tables(lptr.data(), 1, 1, lhdr, ltyp, right.ti, jn); // Only left id + right score (not notes). REQUIRE(res.cols == 2); REQUIRE(res.headers[1] == "score"); } // --------------------------------------------------------------------------- // tabla derecha vacia -> left join devuelve todas las filas de la izquierda // --------------------------------------------------------------------------- TEST_CASE("right table vacia con left join devuelve todas las filas izquierda") { std::vector lhdr = {"id","val"}; std::vector ltyp(2, ColumnType::Auto); std::vector lraw = {"1","a", "2","b", "3","c"}; std::vector lptr; for (auto& s : lraw) lptr.push_back(s.c_str()); TableData right; right.build("r", {"id","extra"}, {}); // 0 rows Join jn = make_join(JoinStrategy::Left, {{"id","id"}}); StageOutput res = join_tables(lptr.data(), 3, 2, lhdr, ltyp, right.ti, jn); REQUIRE(res.rows == 3); // All right cells are empty. for (int r = 0; r < 3; ++r) { REQUIRE(std::string(res.cells[r * 4 + 2]) == ""); REQUIRE(std::string(res.cells[r * 4 + 3]) == ""); } }