// Tests para compute_column_stats (cpp/functions/core/compute_column_stats). // Pura: sin ImGui context, sin I/O. #define CATCH_CONFIG_MAIN #include "catch_amalgamated.hpp" #include "core/compute_column_stats.h" #include using namespace data_table; // --------------------------------------------------------------------------- // Helpers // --------------------------------------------------------------------------- static double absrel(double got, double expected) { if (expected == 0.0) return std::abs(got); return std::abs(got - expected) / std::abs(expected); } // --------------------------------------------------------------------------- // stats sobre vector numerico conocido // --------------------------------------------------------------------------- TEST_CASE("compute_column_stats: media correcta sobre vector numerico") { // 1,2,3,4,5 -> mean = 3.0 std::vector data = {"1", "2", "3", "4", "5"}; ColStats s = compute_column_stats(data.data(), 5, 1, 0); REQUIRE(s.total == 5); REQUIRE(s.empty_count == 0); REQUIRE(s.numeric_count == 5); REQUIRE(s.numeric == true); REQUIRE(absrel(s.mean, 3.0) < 1e-9); } // --------------------------------------------------------------------------- // p50 = mediana // --------------------------------------------------------------------------- TEST_CASE("compute_column_stats: p50 es mediana") { // [1,2,3,4,5] -> p50=3 std::vector data = {"1", "2", "3", "4", "5"}; ColStats s = compute_column_stats(data.data(), 5, 1, 0); REQUIRE(absrel(s.p50, 3.0) < 1e-9); } TEST_CASE("compute_column_stats: p25 y p75 correctos") { // [1,2,3,4] -> p25=1.75, p50=2.5, p75=3.25 (interpolacion lineal) std::vector data = {"1", "2", "3", "4"}; ColStats s = compute_column_stats(data.data(), 4, 1, 0); REQUIRE(absrel(s.p25, 1.75) < 1e-9); REQUIRE(absrel(s.p50, 2.5) < 1e-9); REQUIRE(absrel(s.p75, 3.25) < 1e-9); } // --------------------------------------------------------------------------- // missing count // --------------------------------------------------------------------------- TEST_CASE("compute_column_stats: conteo de vacios correcto") { // 3 valores, 2 vacios std::vector data = {"", "5", nullptr, "10", ""}; ColStats s = compute_column_stats(data.data(), 5, 1, 0); REQUIRE(s.total == 5); REQUIRE(s.empty_count == 3); REQUIRE(s.numeric_count == 2); } // --------------------------------------------------------------------------- // columna de texto: no es numerica // --------------------------------------------------------------------------- TEST_CASE("compute_column_stats: columna texto no es numerica") { std::vector data = {"Alice", "Bob", "Carol"}; ColStats s = compute_column_stats(data.data(), 3, 1, 0); REQUIRE(s.numeric == false); REQUIRE(s.numeric_count == 0); REQUIRE(s.unique_count == 3); } // --------------------------------------------------------------------------- // unique_count // --------------------------------------------------------------------------- TEST_CASE("compute_column_stats: unique_count correcto") { std::vector data = {"a", "b", "a", "c", "b", "a"}; ColStats s = compute_column_stats(data.data(), 6, 1, 0); REQUIRE(s.unique_count == 3); REQUIRE(s.unique_capped == false); } // --------------------------------------------------------------------------- // top_categories: la mas frecuente es la primera // --------------------------------------------------------------------------- TEST_CASE("compute_column_stats: top_categories ordena por frecuencia desc") { // "x" aparece 4 veces, "y" 2, "z" 1 std::vector data = {"x", "x", "y", "x", "z", "y", "x"}; ColStats s = compute_column_stats(data.data(), 7, 1, 0); REQUIRE(s.top_categories.size() >= 1); REQUIRE(s.top_categories[0].first == "x"); REQUIRE(s.top_categories[0].second == 4); } // --------------------------------------------------------------------------- // indices: solo las filas indicadas // --------------------------------------------------------------------------- TEST_CASE("compute_column_stats: indices filtra filas correctamente") { // Columna: [10, 20, 30, 40, 50] // Solo filas 0, 2, 4 -> [10, 30, 50] -> mean=30 std::vector data = {"10", "20", "30", "40", "50"}; int idx[] = {0, 2, 4}; ColStats s = compute_column_stats(data.data(), 5, 1, 0, 100000, idx, 3); REQUIRE(s.total == 3); REQUIRE(s.numeric_count == 3); REQUIRE(absrel(s.mean, 30.0) < 1e-9); } // --------------------------------------------------------------------------- // histograma: se genera para columnas numericas // --------------------------------------------------------------------------- TEST_CASE("compute_column_stats: histograma generado para numerica") { std::vector data = {"1", "2", "3", "4", "5"}; ColStats s = compute_column_stats(data.data(), 5, 1, 0); REQUIRE(s.hist.size() == (size_t)HIST_BINS); float total_hist = 0; for (float v : s.hist) total_hist += v; REQUIRE(total_hist == Catch::Approx(5.0f)); } // --------------------------------------------------------------------------- // columna totalmente vacia // --------------------------------------------------------------------------- TEST_CASE("compute_column_stats: columna vacia retorna stats en cero") { std::vector data = {"", nullptr, ""}; ColStats s = compute_column_stats(data.data(), 3, 1, 0); REQUIRE(s.total == 3); REQUIRE(s.empty_count == 3); REQUIRE(s.numeric == false); REQUIRE(s.numeric_count == 0); REQUIRE(s.hist.empty()); } // --------------------------------------------------------------------------- // col fuera de rango devuelve stats por defecto // --------------------------------------------------------------------------- TEST_CASE("compute_column_stats: col fuera de rango devuelve ColStats defecto") { std::vector data = {"1", "2"}; ColStats s = compute_column_stats(data.data(), 2, 1, 5); REQUIRE(s.total == 0); }