a03675113a
- .claude/agents/fn-orquestador/SKILL.md - .claude/commands/fn_claude.md - .claude/rules/INDEX.md - .claude/rules/cpp_apps.md - .claude/rules/ids_naming.md - CHANGELOG.md - apps/dag_engine/README.md - apps/dag_engine/api.go - apps/dag_engine/dags_migrated/example.yaml - apps/dag_engine/dags_migrated/example_lineage_tracking.yaml - ... Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
146 lines
6.0 KiB
C++
146 lines
6.0 KiB
C++
// Tests para compute_column_stats (cpp/functions/core/compute_column_stats).
|
|
// Pura: sin ImGui context, sin I/O.
|
|
|
|
#define CATCH_CONFIG_MAIN
|
|
#include "catch_amalgamated.hpp"
|
|
|
|
#include "core/compute_column_stats.h"
|
|
|
|
#include <cmath>
|
|
|
|
using namespace data_table;
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Helpers
|
|
// ---------------------------------------------------------------------------
|
|
static double absrel(double got, double expected) {
|
|
if (expected == 0.0) return std::abs(got);
|
|
return std::abs(got - expected) / std::abs(expected);
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// stats sobre vector numerico conocido
|
|
// ---------------------------------------------------------------------------
|
|
TEST_CASE("compute_column_stats: media correcta sobre vector numerico") {
|
|
// 1,2,3,4,5 -> mean = 3.0
|
|
std::vector<const char*> data = {"1", "2", "3", "4", "5"};
|
|
ColStats s = compute_column_stats(data.data(), 5, 1, 0);
|
|
REQUIRE(s.total == 5);
|
|
REQUIRE(s.empty_count == 0);
|
|
REQUIRE(s.numeric_count == 5);
|
|
REQUIRE(s.numeric == true);
|
|
REQUIRE(absrel(s.mean, 3.0) < 1e-9);
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// p50 = mediana
|
|
// ---------------------------------------------------------------------------
|
|
TEST_CASE("compute_column_stats: p50 es mediana") {
|
|
// [1,2,3,4,5] -> p50=3
|
|
std::vector<const char*> data = {"1", "2", "3", "4", "5"};
|
|
ColStats s = compute_column_stats(data.data(), 5, 1, 0);
|
|
REQUIRE(absrel(s.p50, 3.0) < 1e-9);
|
|
}
|
|
|
|
TEST_CASE("compute_column_stats: p25 y p75 correctos") {
|
|
// [1,2,3,4] -> p25=1.75, p50=2.5, p75=3.25 (interpolacion lineal)
|
|
std::vector<const char*> data = {"1", "2", "3", "4"};
|
|
ColStats s = compute_column_stats(data.data(), 4, 1, 0);
|
|
REQUIRE(absrel(s.p25, 1.75) < 1e-9);
|
|
REQUIRE(absrel(s.p50, 2.5) < 1e-9);
|
|
REQUIRE(absrel(s.p75, 3.25) < 1e-9);
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// missing count
|
|
// ---------------------------------------------------------------------------
|
|
TEST_CASE("compute_column_stats: conteo de vacios correcto") {
|
|
// 3 valores, 2 vacios
|
|
std::vector<const char*> data = {"", "5", nullptr, "10", ""};
|
|
ColStats s = compute_column_stats(data.data(), 5, 1, 0);
|
|
REQUIRE(s.total == 5);
|
|
REQUIRE(s.empty_count == 3);
|
|
REQUIRE(s.numeric_count == 2);
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// columna de texto: no es numerica
|
|
// ---------------------------------------------------------------------------
|
|
TEST_CASE("compute_column_stats: columna texto no es numerica") {
|
|
std::vector<const char*> data = {"Alice", "Bob", "Carol"};
|
|
ColStats s = compute_column_stats(data.data(), 3, 1, 0);
|
|
REQUIRE(s.numeric == false);
|
|
REQUIRE(s.numeric_count == 0);
|
|
REQUIRE(s.unique_count == 3);
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// unique_count
|
|
// ---------------------------------------------------------------------------
|
|
TEST_CASE("compute_column_stats: unique_count correcto") {
|
|
std::vector<const char*> data = {"a", "b", "a", "c", "b", "a"};
|
|
ColStats s = compute_column_stats(data.data(), 6, 1, 0);
|
|
REQUIRE(s.unique_count == 3);
|
|
REQUIRE(s.unique_capped == false);
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// top_categories: la mas frecuente es la primera
|
|
// ---------------------------------------------------------------------------
|
|
TEST_CASE("compute_column_stats: top_categories ordena por frecuencia desc") {
|
|
// "x" aparece 4 veces, "y" 2, "z" 1
|
|
std::vector<const char*> data = {"x", "x", "y", "x", "z", "y", "x"};
|
|
ColStats s = compute_column_stats(data.data(), 7, 1, 0);
|
|
REQUIRE(s.top_categories.size() >= 1);
|
|
REQUIRE(s.top_categories[0].first == "x");
|
|
REQUIRE(s.top_categories[0].second == 4);
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// indices: solo las filas indicadas
|
|
// ---------------------------------------------------------------------------
|
|
TEST_CASE("compute_column_stats: indices filtra filas correctamente") {
|
|
// Columna: [10, 20, 30, 40, 50]
|
|
// Solo filas 0, 2, 4 -> [10, 30, 50] -> mean=30
|
|
std::vector<const char*> data = {"10", "20", "30", "40", "50"};
|
|
int idx[] = {0, 2, 4};
|
|
ColStats s = compute_column_stats(data.data(), 5, 1, 0, 100000, idx, 3);
|
|
REQUIRE(s.total == 3);
|
|
REQUIRE(s.numeric_count == 3);
|
|
REQUIRE(absrel(s.mean, 30.0) < 1e-9);
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// histograma: se genera para columnas numericas
|
|
// ---------------------------------------------------------------------------
|
|
TEST_CASE("compute_column_stats: histograma generado para numerica") {
|
|
std::vector<const char*> data = {"1", "2", "3", "4", "5"};
|
|
ColStats s = compute_column_stats(data.data(), 5, 1, 0);
|
|
REQUIRE(s.hist.size() == (size_t)HIST_BINS);
|
|
float total_hist = 0;
|
|
for (float v : s.hist) total_hist += v;
|
|
REQUIRE(total_hist == Catch::Approx(5.0f));
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// columna totalmente vacia
|
|
// ---------------------------------------------------------------------------
|
|
TEST_CASE("compute_column_stats: columna vacia retorna stats en cero") {
|
|
std::vector<const char*> data = {"", nullptr, ""};
|
|
ColStats s = compute_column_stats(data.data(), 3, 1, 0);
|
|
REQUIRE(s.total == 3);
|
|
REQUIRE(s.empty_count == 3);
|
|
REQUIRE(s.numeric == false);
|
|
REQUIRE(s.numeric_count == 0);
|
|
REQUIRE(s.hist.empty());
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// col fuera de rango devuelve stats por defecto
|
|
// ---------------------------------------------------------------------------
|
|
TEST_CASE("compute_column_stats: col fuera de rango devuelve ColStats defecto") {
|
|
std::vector<const char*> data = {"1", "2"};
|
|
ColStats s = compute_column_stats(data.data(), 2, 1, 5);
|
|
REQUIRE(s.total == 0);
|
|
}
|