Files
fn_registry/cpp/tests/test_compute_column_stats.cpp
egutierrez a03675113a chore: auto-commit (286 archivos)
- .claude/agents/fn-orquestador/SKILL.md
- .claude/commands/fn_claude.md
- .claude/rules/INDEX.md
- .claude/rules/cpp_apps.md
- .claude/rules/ids_naming.md
- CHANGELOG.md
- apps/dag_engine/README.md
- apps/dag_engine/api.go
- apps/dag_engine/dags_migrated/example.yaml
- apps/dag_engine/dags_migrated/example_lineage_tracking.yaml
- ...

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-16 16:33:22 +02:00

146 lines
6.0 KiB
C++

// Tests para compute_column_stats (cpp/functions/core/compute_column_stats).
// Pura: sin ImGui context, sin I/O.
#define CATCH_CONFIG_MAIN
#include "catch_amalgamated.hpp"
#include "core/compute_column_stats.h"
#include <cmath>
using namespace data_table;
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
static double absrel(double got, double expected) {
if (expected == 0.0) return std::abs(got);
return std::abs(got - expected) / std::abs(expected);
}
// ---------------------------------------------------------------------------
// stats sobre vector numerico conocido
// ---------------------------------------------------------------------------
TEST_CASE("compute_column_stats: media correcta sobre vector numerico") {
// 1,2,3,4,5 -> mean = 3.0
std::vector<const char*> data = {"1", "2", "3", "4", "5"};
ColStats s = compute_column_stats(data.data(), 5, 1, 0);
REQUIRE(s.total == 5);
REQUIRE(s.empty_count == 0);
REQUIRE(s.numeric_count == 5);
REQUIRE(s.numeric == true);
REQUIRE(absrel(s.mean, 3.0) < 1e-9);
}
// ---------------------------------------------------------------------------
// p50 = mediana
// ---------------------------------------------------------------------------
TEST_CASE("compute_column_stats: p50 es mediana") {
// [1,2,3,4,5] -> p50=3
std::vector<const char*> data = {"1", "2", "3", "4", "5"};
ColStats s = compute_column_stats(data.data(), 5, 1, 0);
REQUIRE(absrel(s.p50, 3.0) < 1e-9);
}
TEST_CASE("compute_column_stats: p25 y p75 correctos") {
// [1,2,3,4] -> p25=1.75, p50=2.5, p75=3.25 (interpolacion lineal)
std::vector<const char*> data = {"1", "2", "3", "4"};
ColStats s = compute_column_stats(data.data(), 4, 1, 0);
REQUIRE(absrel(s.p25, 1.75) < 1e-9);
REQUIRE(absrel(s.p50, 2.5) < 1e-9);
REQUIRE(absrel(s.p75, 3.25) < 1e-9);
}
// ---------------------------------------------------------------------------
// missing count
// ---------------------------------------------------------------------------
TEST_CASE("compute_column_stats: conteo de vacios correcto") {
// 3 valores, 2 vacios
std::vector<const char*> data = {"", "5", nullptr, "10", ""};
ColStats s = compute_column_stats(data.data(), 5, 1, 0);
REQUIRE(s.total == 5);
REQUIRE(s.empty_count == 3);
REQUIRE(s.numeric_count == 2);
}
// ---------------------------------------------------------------------------
// columna de texto: no es numerica
// ---------------------------------------------------------------------------
TEST_CASE("compute_column_stats: columna texto no es numerica") {
std::vector<const char*> data = {"Alice", "Bob", "Carol"};
ColStats s = compute_column_stats(data.data(), 3, 1, 0);
REQUIRE(s.numeric == false);
REQUIRE(s.numeric_count == 0);
REQUIRE(s.unique_count == 3);
}
// ---------------------------------------------------------------------------
// unique_count
// ---------------------------------------------------------------------------
TEST_CASE("compute_column_stats: unique_count correcto") {
std::vector<const char*> data = {"a", "b", "a", "c", "b", "a"};
ColStats s = compute_column_stats(data.data(), 6, 1, 0);
REQUIRE(s.unique_count == 3);
REQUIRE(s.unique_capped == false);
}
// ---------------------------------------------------------------------------
// top_categories: la mas frecuente es la primera
// ---------------------------------------------------------------------------
TEST_CASE("compute_column_stats: top_categories ordena por frecuencia desc") {
// "x" aparece 4 veces, "y" 2, "z" 1
std::vector<const char*> data = {"x", "x", "y", "x", "z", "y", "x"};
ColStats s = compute_column_stats(data.data(), 7, 1, 0);
REQUIRE(s.top_categories.size() >= 1);
REQUIRE(s.top_categories[0].first == "x");
REQUIRE(s.top_categories[0].second == 4);
}
// ---------------------------------------------------------------------------
// indices: solo las filas indicadas
// ---------------------------------------------------------------------------
TEST_CASE("compute_column_stats: indices filtra filas correctamente") {
// Columna: [10, 20, 30, 40, 50]
// Solo filas 0, 2, 4 -> [10, 30, 50] -> mean=30
std::vector<const char*> data = {"10", "20", "30", "40", "50"};
int idx[] = {0, 2, 4};
ColStats s = compute_column_stats(data.data(), 5, 1, 0, 100000, idx, 3);
REQUIRE(s.total == 3);
REQUIRE(s.numeric_count == 3);
REQUIRE(absrel(s.mean, 30.0) < 1e-9);
}
// ---------------------------------------------------------------------------
// histograma: se genera para columnas numericas
// ---------------------------------------------------------------------------
TEST_CASE("compute_column_stats: histograma generado para numerica") {
std::vector<const char*> data = {"1", "2", "3", "4", "5"};
ColStats s = compute_column_stats(data.data(), 5, 1, 0);
REQUIRE(s.hist.size() == (size_t)HIST_BINS);
float total_hist = 0;
for (float v : s.hist) total_hist += v;
REQUIRE(total_hist == Catch::Approx(5.0f));
}
// ---------------------------------------------------------------------------
// columna totalmente vacia
// ---------------------------------------------------------------------------
TEST_CASE("compute_column_stats: columna vacia retorna stats en cero") {
std::vector<const char*> data = {"", nullptr, ""};
ColStats s = compute_column_stats(data.data(), 3, 1, 0);
REQUIRE(s.total == 3);
REQUIRE(s.empty_count == 3);
REQUIRE(s.numeric == false);
REQUIRE(s.numeric_count == 0);
REQUIRE(s.hist.empty());
}
// ---------------------------------------------------------------------------
// col fuera de rango devuelve stats por defecto
// ---------------------------------------------------------------------------
TEST_CASE("compute_column_stats: col fuera de rango devuelve ColStats defecto") {
std::vector<const char*> data = {"1", "2"};
ColStats s = compute_column_stats(data.data(), 2, 1, 5);
REQUIRE(s.total == 0);
}