Files
fn_registry/cpp/tests/test_join_tables.cpp
egutierrez 212875ed0d chore: auto-commit (286 archivos)
- .claude/agents/fn-orquestador/SKILL.md
- .claude/commands/fn_claude.md
- .claude/rules/INDEX.md
- .claude/rules/cpp_apps.md
- .claude/rules/ids_naming.md
- CHANGELOG.md
- apps/dag_engine/README.md
- apps/dag_engine/api.go
- apps/dag_engine/dags_migrated/example.yaml
- apps/dag_engine/dags_migrated/example_lineage_tracking.yaml
- ...

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-16 16:33:22 +02:00

281 lines
11 KiB
C++

// Tests for data_table::join_tables (cpp/functions/core/join_tables).
// Pure multi-key hash join — no ImGui, no I/O.
#define CATCH_CONFIG_MAIN
#include "catch_amalgamated.hpp"
#include "core/join_tables.h"
using namespace data_table;
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
// Build a TableInput from a flat list of string literals (row-major).
// Ownership: the TableInput holds its own cells vector.
struct TableData {
std::vector<std::string> header_strs;
std::vector<ColumnType> types;
std::vector<const char*> cell_ptrs;
std::vector<std::string> cell_strs;
TableInput ti;
void build(const std::string& name,
std::initializer_list<std::string> headers,
std::initializer_list<std::string> cells_flat)
{
header_strs = std::vector<std::string>(headers);
cell_strs = std::vector<std::string>(cells_flat);
types.assign(header_strs.size(), ColumnType::Auto);
cell_ptrs.clear();
for (auto& s : cell_strs) cell_ptrs.push_back(s.c_str());
ti.name = name;
ti.headers = header_strs;
ti.types = types;
ti.cells = cell_ptrs.data();
ti.cols = (int)header_strs.size();
ti.rows = cell_ptrs.empty() ? 0 : (int)(cell_strs.size() / header_strs.size());
}
};
static Join make_join(JoinStrategy strat,
std::initializer_list<std::pair<std::string,std::string>> on,
const std::string& alias = "",
std::initializer_list<std::string> fields = {})
{
Join j;
j.strategy = strat;
j.alias = alias;
j.on = on;
j.fields = fields;
return j;
}
// ---------------------------------------------------------------------------
// inner join basico
// ---------------------------------------------------------------------------
TEST_CASE("inner join basico") {
// left: id, name
std::vector<std::string> lhdr = {"id", "name"};
std::vector<ColumnType> ltyp = {ColumnType::Auto, ColumnType::Auto};
std::vector<std::string> lraw = {"1","Alice", "2","Bob", "3","Carol"};
std::vector<const char*> lptr;
for (auto& s : lraw) lptr.push_back(s.c_str());
TableData right;
right.build("orders", {"user_id","amount"}, {"1","100", "2","200", "2","150"});
Join jn = make_join(JoinStrategy::Inner, {{"id","user_id"}});
StageOutput res = join_tables(lptr.data(), 3, 2, lhdr, ltyp, right.ti, jn);
// Alice->100, Bob->200, Bob->150. Carol has no match.
REQUIRE(res.rows == 3);
REQUIRE(res.cols == 4);
// Check headers: id, name, user_id, amount
REQUIRE(res.headers[0] == "id");
REQUIRE(res.headers[1] == "name");
REQUIRE(res.headers[2] == "user_id");
REQUIRE(res.headers[3] == "amount");
}
// ---------------------------------------------------------------------------
// left join con orphans
// ---------------------------------------------------------------------------
TEST_CASE("left join con orphans") {
std::vector<std::string> lhdr = {"id", "name"};
std::vector<ColumnType> ltyp = {ColumnType::Auto, ColumnType::Auto};
std::vector<std::string> lraw = {"1","Alice", "2","Bob", "3","Carol"};
std::vector<const char*> lptr;
for (auto& s : lraw) lptr.push_back(s.c_str());
TableData right;
right.build("orders", {"user_id","amount"}, {"1","100"});
Join jn = make_join(JoinStrategy::Left, {{"id","user_id"}});
StageOutput res = join_tables(lptr.data(), 3, 2, lhdr, ltyp, right.ti, jn);
// Alice matches, Bob and Carol are left orphans -> 3 rows total.
REQUIRE(res.rows == 3);
// Verify the unmatched rows have empty right cells.
// Row 0: Alice, amount=100
REQUIRE(std::string(res.cells[0 * 4 + 3]) == "100");
// Row 1: Bob, amount="" (orphan)
REQUIRE(std::string(res.cells[1 * 4 + 3]) == "");
// Row 2: Carol, amount="" (orphan)
REQUIRE(std::string(res.cells[2 * 4 + 3]) == "");
}
// ---------------------------------------------------------------------------
// right join con orphans
// ---------------------------------------------------------------------------
TEST_CASE("right join con orphans") {
std::vector<std::string> lhdr = {"id", "name"};
std::vector<ColumnType> ltyp = {ColumnType::Auto, ColumnType::Auto};
// Only Alice present on left.
std::vector<std::string> lraw = {"1","Alice"};
std::vector<const char*> lptr;
for (auto& s : lraw) lptr.push_back(s.c_str());
TableData right;
// user_id 1 and 2 on right; user_id 2 has no match on left.
right.build("orders", {"user_id","amount"}, {"1","100", "2","200"});
Join jn = make_join(JoinStrategy::Right, {{"id","user_id"}});
StageOutput res = join_tables(lptr.data(), 1, 2, lhdr, ltyp, right.ti, jn);
// Row 0: Alice+100 (matched), Row 1: ""+"2"+"200" (right orphan).
REQUIRE(res.rows == 2);
// Row 0 has Alice's name.
REQUIRE(std::string(res.cells[0 * 4 + 1]) == "Alice");
// Row 1 left side is empty.
REQUIRE(std::string(res.cells[1 * 4 + 0]) == "");
REQUIRE(std::string(res.cells[1 * 4 + 1]) == "");
// Row 1 right amount is 200.
REQUIRE(std::string(res.cells[1 * 4 + 3]) == "200");
}
// ---------------------------------------------------------------------------
// full outer join
// ---------------------------------------------------------------------------
TEST_CASE("full outer join") {
std::vector<std::string> lhdr = {"id","val"};
std::vector<ColumnType> ltyp = {ColumnType::Auto, ColumnType::Auto};
std::vector<std::string> lraw = {"A","1", "B","2"};
std::vector<const char*> lptr;
for (auto& s : lraw) lptr.push_back(s.c_str());
TableData right;
// B and C on right; A has no match on right.
right.build("r", {"key","score"}, {"B","10", "C","30"});
Join jn = make_join(JoinStrategy::Full, {{"id","key"}});
StageOutput res = join_tables(lptr.data(), 2, 2, lhdr, ltyp, right.ti, jn);
// A (left orphan), B (matched), C (right orphan) -> 3 rows.
REQUIRE(res.rows == 3);
}
// ---------------------------------------------------------------------------
// multi-key join (2 keys)
// ---------------------------------------------------------------------------
TEST_CASE("multi-key join con 2 keys") {
// left: dept, year, budget
std::vector<std::string> lhdr = {"dept","year","budget"};
std::vector<ColumnType> ltyp(3, ColumnType::Auto);
std::vector<std::string> lraw = {
"eng","2024","100",
"eng","2025","110",
"hr", "2024","50"
};
std::vector<const char*> lptr;
for (auto& s : lraw) lptr.push_back(s.c_str());
TableData right;
// right: dept, year, headcount
right.build("headcount", {"dept","year","headcount"}, {
"eng","2024","20",
"hr", "2024","5"
// eng-2025 missing on right
});
Join jn = make_join(JoinStrategy::Inner, {{"dept","dept"},{"year","year"}});
StageOutput res = join_tables(lptr.data(), 3, 3, lhdr, ltyp, right.ti, jn);
// eng-2024 matches, hr-2024 matches, eng-2025 has no match -> 2 rows.
REQUIRE(res.rows == 2);
}
// ---------------------------------------------------------------------------
// key con duplicados (producto cartesiano por clave duplicada)
// ---------------------------------------------------------------------------
TEST_CASE("key con duplicados produce producto cartesiano") {
// left: 1 row with id=X
std::vector<std::string> lhdr = {"id","lval"};
std::vector<ColumnType> ltyp(2, ColumnType::Auto);
std::vector<std::string> lraw = {"X","L1"};
std::vector<const char*> lptr;
for (auto& s : lraw) lptr.push_back(s.c_str());
TableData right;
// right: 3 rows with key=X
right.build("r", {"key","rval"}, {"X","R1", "X","R2", "X","R3"});
Join jn = make_join(JoinStrategy::Inner, {{"id","key"}});
StageOutput res = join_tables(lptr.data(), 1, 2, lhdr, ltyp, right.ti, jn);
// 1 left row x 3 matching right rows = 3 output rows.
REQUIRE(res.rows == 3);
REQUIRE(std::string(res.cells[0 * 4 + 3]) == "R1");
REQUIRE(std::string(res.cells[1 * 4 + 3]) == "R2");
REQUIRE(std::string(res.cells[2 * 4 + 3]) == "R3");
}
// ---------------------------------------------------------------------------
// alias en columnas del derecho
// ---------------------------------------------------------------------------
TEST_CASE("alias prefija columnas del derecho") {
std::vector<std::string> lhdr = {"id"};
std::vector<ColumnType> ltyp = {ColumnType::Auto};
std::vector<std::string> lraw = {"1"};
std::vector<const char*> lptr;
for (auto& s : lraw) lptr.push_back(s.c_str());
TableData right;
right.build("r", {"id","score"}, {"1","99"});
Join jn = make_join(JoinStrategy::Inner, {{"id","id"}}, "r");
StageOutput res = join_tables(lptr.data(), 1, 1, lhdr, ltyp, right.ti, jn);
REQUIRE(res.cols == 3);
REQUIRE(res.headers[1] == "r.id");
REQUIRE(res.headers[2] == "r.score");
}
// ---------------------------------------------------------------------------
// fields subset: solo algunas columnas del derecho
// ---------------------------------------------------------------------------
TEST_CASE("fields subset incluye solo columnas especificadas") {
std::vector<std::string> lhdr = {"id"};
std::vector<ColumnType> ltyp = {ColumnType::Auto};
std::vector<std::string> lraw = {"1"};
std::vector<const char*> lptr;
for (auto& s : lraw) lptr.push_back(s.c_str());
TableData right;
right.build("r", {"id","score","notes"}, {"1","42","hello"});
Join jn = make_join(JoinStrategy::Inner, {{"id","id"}}, "", {"score"});
StageOutput res = join_tables(lptr.data(), 1, 1, lhdr, ltyp, right.ti, jn);
// Only left id + right score (not notes).
REQUIRE(res.cols == 2);
REQUIRE(res.headers[1] == "score");
}
// ---------------------------------------------------------------------------
// tabla derecha vacia -> left join devuelve todas las filas de la izquierda
// ---------------------------------------------------------------------------
TEST_CASE("right table vacia con left join devuelve todas las filas izquierda") {
std::vector<std::string> lhdr = {"id","val"};
std::vector<ColumnType> ltyp(2, ColumnType::Auto);
std::vector<std::string> lraw = {"1","a", "2","b", "3","c"};
std::vector<const char*> lptr;
for (auto& s : lraw) lptr.push_back(s.c_str());
TableData right;
right.build("r", {"id","extra"}, {}); // 0 rows
Join jn = make_join(JoinStrategy::Left, {{"id","id"}});
StageOutput res = join_tables(lptr.data(), 3, 2, lhdr, ltyp, right.ti, jn);
REQUIRE(res.rows == 3);
// All right cells are empty.
for (int r = 0; r < 3; ++r) {
REQUIRE(std::string(res.cells[r * 4 + 2]) == "");
REQUIRE(std::string(res.cells[r * 4 + 3]) == "");
}
}