d782d463cb
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
366 lines
16 KiB
C++
366 lines
16 KiB
C++
// Logica pura del playground data_table. Sin ImGui — testable headless.
|
|
// Cuando se promueva al registry, esto sera la base de data_table_cpp_viz.
|
|
#pragma once
|
|
|
|
#include <string>
|
|
#include <utility>
|
|
#include <vector>
|
|
|
|
namespace data_table {
|
|
|
|
enum class Op {
|
|
Eq, Neq, Gt, Gte, Lt, Lte,
|
|
Contains, NotContains, StartsWith, EndsWith
|
|
};
|
|
const char* op_label(Op o);
|
|
bool op_is_string_only(Op o);
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// Column types - declarado por caller con fallback a auto-detect.
|
|
// ----------------------------------------------------------------------------
|
|
enum class ColumnType {
|
|
Auto, String, Int, Float, Bool, Date, Json
|
|
};
|
|
|
|
const char* column_type_name(ColumnType t);
|
|
const char* column_type_icon(ColumnType t); // UTF-8 Tabler icon
|
|
|
|
// Ops permitidos para cada tipo. Devuelve vector ordenado.
|
|
std::vector<Op> ops_for_type(ColumnType t);
|
|
|
|
// Auto-detect via sample: escanea hasta `sample_n` celdas no-vacias.
|
|
ColumnType auto_detect_type(const char* const* cells, int rows, int cols,
|
|
int col, int sample_n = 64);
|
|
|
|
// Tipo efectivo: si declared != Auto -> declared; else auto_detect.
|
|
ColumnType effective_type(ColumnType declared,
|
|
const char* const* cells, int rows, int cols, int col);
|
|
|
|
// Derived column: inmutable. Dos modos:
|
|
// 1) Retipo puro: source_col >= 0, formula == "". Cells del origen.
|
|
// 2) Formula: source_col == -1, formula no vacia. Eval por Lua.
|
|
struct DerivedColumn {
|
|
int source_col = -1;
|
|
ColumnType type = ColumnType::String;
|
|
std::string name;
|
|
std::string formula; // "" = retipado puro; resto = body Lua
|
|
int lua_id = -1; // referencia en lua_engine; -1 si no compilado
|
|
std::string compile_error;
|
|
};
|
|
|
|
// Filter movido aqui (antes era despues de State) porque TQL Stage lo necesita.
|
|
struct Filter {
|
|
int col;
|
|
Op op;
|
|
std::string value;
|
|
};
|
|
|
|
struct ColorRule {
|
|
int col;
|
|
std::string equals;
|
|
unsigned int color;
|
|
};
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// TQL (Table Query Language) — stage model. Ver docs/TQL.md.
|
|
// ----------------------------------------------------------------------------
|
|
enum class AggFn {
|
|
Count, Sum, Avg, Min, Max, Distinct, Stddev,
|
|
Median, P25, P75, P90, P99, Percentile
|
|
};
|
|
|
|
const char* agg_fn_name(AggFn f);
|
|
|
|
struct Aggregation {
|
|
AggFn fn = AggFn::Count;
|
|
std::string col; // ignorado para Count
|
|
double arg = 0.0; // para Percentile (0..1)
|
|
std::string alias; // vacio -> auto-generado via aggregation_alias()
|
|
};
|
|
|
|
struct SortClause {
|
|
std::string col;
|
|
bool desc = false;
|
|
};
|
|
|
|
// Stage: layer de TQL. Stage 0 = Raw (sin breakouts/aggregations).
|
|
// Stage 1+ pueden agrupar. Cada stage consume output del anterior.
|
|
struct Stage {
|
|
std::vector<Filter> filters;
|
|
std::vector<DerivedColumn> derived; // expressions de este stage
|
|
std::vector<std::string> breakouts; // col names del INPUT de este stage
|
|
std::vector<Aggregation> aggregations;
|
|
std::vector<SortClause> sorts;
|
|
};
|
|
|
|
// Pure: alias por defecto cuando agg.alias esta vacio.
|
|
// count -> "count"
|
|
// distinct col -> "distinct_<col>"
|
|
// percentile p -> "p<arg*100>_<col>" (ej. p95_size_kb)
|
|
// resto -> "<fn>_<col>" (ej. avg_size_kb)
|
|
std::string aggregation_alias(const Aggregation& a);
|
|
|
|
// Pure: tipo del output de la aggregation.
|
|
// count, distinct -> Int
|
|
// sum, avg, stddev,
|
|
// median, p*, percentile -> Float
|
|
// min, max -> mismo tipo que la col origen
|
|
ColumnType aggregation_type(const Aggregation& a,
|
|
const std::vector<std::string>& in_headers,
|
|
const std::vector<ColumnType>& in_types);
|
|
|
|
// Output de compute_stage. Posee `cell_backing` (strings nuevos para
|
|
// resultados agregados) y `cells` (punteros row-major a backing o a
|
|
// `in_cells` original para passthrough).
|
|
struct StageOutput {
|
|
std::vector<std::string> cell_backing;
|
|
std::vector<const char*> cells;
|
|
int rows = 0;
|
|
int cols = 0;
|
|
std::vector<std::string> headers;
|
|
std::vector<ColumnType> types;
|
|
};
|
|
|
|
// Pure: ejecuta un Stage sobre los cells de entrada. Aplica filter -> (group+agg|passthrough) -> sort.
|
|
StageOutput compute_stage(const char* const* in_cells, int in_rows, int in_cols,
|
|
const std::vector<std::string>& in_headers,
|
|
const std::vector<ColumnType>& in_types,
|
|
const Stage& stage);
|
|
|
|
// Pure: aplica filtros usando headers para resolver f.col (que ahora es
|
|
// indice en el array de in_headers, no del dataset original). Devuelve
|
|
// indices de filas que pasan.
|
|
std::vector<int> apply_filters(const char* const* cells, int rows, int cols,
|
|
const std::vector<Filter>& filters);
|
|
|
|
// Pure: helper para drill-down. Devuelve un Filter Op::Eq sobre col_idx con
|
|
// el value indicado. col_idx es indice en los headers del INPUT del stage
|
|
// previo (donde se va a aplicar el filtro).
|
|
Filter make_drill_filter(int col_idx, const std::string& value);
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// ViewMode: tipo de visualizacion a renderizar sobre el output del stage activo.
|
|
// "Table" siempre disponible. Resto requiere ciertos tipos de columnas.
|
|
// ----------------------------------------------------------------------------
|
|
enum class ViewMode {
|
|
Table,
|
|
// Bars
|
|
Bar, // horizontal bars: 1 cat + 1 num
|
|
Column, // vertical bars: 1 cat + 1 num
|
|
GroupedBar, // 1 cat + N num (side-by-side)
|
|
StackedBar, // 1 cat + N num (stacked)
|
|
// Lines / area
|
|
Line, // X + 1..N Y series
|
|
Area, // shaded to y=0
|
|
Stairs, // step plot
|
|
// Points
|
|
Scatter, // X + Y
|
|
Bubble, // X + Y + size
|
|
// Distribution
|
|
Histogram, // 1 num
|
|
Histogram2D, // 2 num
|
|
Heatmap, // matrix from breakouts
|
|
BoxPlot, // 1 cat + 1 num (min/p25/p50/p75/max per group)
|
|
// Stems / signals
|
|
Stem,
|
|
ErrorBars,
|
|
// Composition
|
|
Pie,
|
|
Donut,
|
|
Funnel, // ordered descending bars
|
|
Waterfall, // running sum
|
|
// Single values
|
|
KPI, // big text + label
|
|
KPIGrid, // all aggregations as cards
|
|
// Specialized
|
|
Candlestick, // OHLC: time + open + high + low + close
|
|
Radar, // multi-axis (1 cat + N num)
|
|
};
|
|
|
|
const char* view_mode_token(ViewMode m); // "table", "bar", ...
|
|
const char* view_mode_label(ViewMode m); // "Table", "Bar (horizontal)", ...
|
|
ViewMode view_mode_from_token(const char* s);
|
|
int view_mode_min_cols(ViewMode m);
|
|
bool view_mode_needs_numeric(ViewMode m);
|
|
bool view_mode_needs_category(ViewMode m);
|
|
// Requiere stage agrupado (breakout+aggregation). Si user esta en stage 0 con
|
|
// uno de estos, conviene auto-promote a stage 1.
|
|
bool view_mode_needs_aggregation(ViewMode m);
|
|
|
|
// Lista completa de modos para el selector UI (orden de display).
|
|
const ViewMode* all_view_modes(int* n_out);
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// Joins (MBQL-style). Ver issue 0078.
|
|
// ----------------------------------------------------------------------------
|
|
enum class JoinStrategy { Left, Inner, Right, Full };
|
|
const char* join_strategy_token(JoinStrategy s);
|
|
JoinStrategy join_strategy_from_token(const char* s);
|
|
const char* join_strategy_label(JoinStrategy s);
|
|
|
|
// Tabla extra pasada al render() para joins. Owner externo (caller).
|
|
struct TableInput {
|
|
std::string name; // identificador estable (matchea Join.source)
|
|
std::vector<std::string> headers;
|
|
std::vector<ColumnType> types;
|
|
const char* const* cells = nullptr; // row-major, headers.size() cols x rows filas
|
|
int rows = 0;
|
|
int cols = 0;
|
|
};
|
|
|
|
// Join clause: une la tabla actual con `source` por las parejas `on`,
|
|
// prefijando las cols del derecho con `alias.`.
|
|
struct Join {
|
|
std::string alias;
|
|
std::string source;
|
|
std::vector<std::pair<std::string, std::string>> on; // {left_col, right_col}
|
|
JoinStrategy strategy = JoinStrategy::Left;
|
|
std::vector<std::string> fields; // vacio = all del derecho
|
|
};
|
|
|
|
// Pure: resuelve indice del main entre `tables` segun `main_source`.
|
|
// Vacio -> 0. Nombre desconocido -> 0. tables vacio -> -1.
|
|
int resolve_main_idx(const std::vector<TableInput>& tables, const std::string& main_source);
|
|
|
|
// Pure: aplica un join sobre dos tablas. Resultado: StageOutput con
|
|
// `headers` = left + `<alias>.<right_col>` (filtrado por fields si no vacio).
|
|
StageOutput join_tables(const char* const* left_cells, int left_rows, int left_cols,
|
|
const std::vector<std::string>& left_headers,
|
|
const std::vector<ColumnType>& left_types,
|
|
const TableInput& right,
|
|
const Join& jn);
|
|
|
|
// ViewConfig: overrides manuales de auto-detect para la vista activa.
|
|
// Campos vacios -> auto. Si col name no existe en output, viz cae a auto.
|
|
struct ViewConfig {
|
|
std::string x_col; // single: scatter, line, hist2d
|
|
std::vector<std::string> y_cols; // 1..N: line/area/bar/etc
|
|
std::string size_col; // bubble
|
|
std::string cat_col; // bar/pie/funnel/box override
|
|
unsigned int primary_color = 0; // 0 = ImPlot auto
|
|
int hist_bins = 0; // 0 = Sturges
|
|
float pie_radius = 0.0f; // 0 = default
|
|
bool show_legend = true;
|
|
bool show_markers = false; // line/area markers
|
|
bool locked = false; // disable pan/zoom
|
|
mutable bool fit_request = false; // consumed by viz::render
|
|
};
|
|
|
|
// VizPanel: viz adicional sobre el mismo StageOutput. State.display + viz_config
|
|
// es el panel 0 (siempre visible); extra_panels son los aniadidos por el user.
|
|
struct VizPanel {
|
|
ViewMode display = ViewMode::Bar;
|
|
ViewConfig config;
|
|
// Memoria del ultimo non-Table display para toggle Table<->View.
|
|
mutable ViewMode last_non_table = ViewMode::Bar;
|
|
};
|
|
|
|
// State: stage pipeline + viz globales.
|
|
//
|
|
// `stages` siempre tiene tamaño >= 1 (auto-init en compute_visible_rows / render
|
|
// si esta vacio: se crea stages[0] vacio). Stage 0 es Raw (filters + derived +
|
|
// sorts; SIN breakouts/aggregations). Stages 1+ pueden agrupar.
|
|
//
|
|
// `active_stage` = indice del stage cuyo output se renderiza.
|
|
// `col_visible/col_order/color_rules` aplican al output del stage activo.
|
|
struct State {
|
|
std::vector<Stage> stages;
|
|
int active_stage = 0;
|
|
ViewMode display = ViewMode::Table;
|
|
ViewConfig viz_config;
|
|
std::vector<VizPanel> extra_panels;
|
|
std::vector<Join> joins; // aplicado antes de stages[0]
|
|
std::string main_source; // name de TableInput a usar como main; vacio -> tables[0]
|
|
|
|
std::vector<ColorRule> color_rules;
|
|
std::vector<bool> col_visible; // size = effective_cols del stage activo
|
|
std::vector<int> col_order; // permutacion [0..effective_cols)
|
|
|
|
// --- Compat helpers: shortcuts a stages[0] (Raw) ---
|
|
// Util tras refactor para tests / accesos puntuales. Garantizan stages[0]
|
|
// existe (lo crean vacio si no).
|
|
Stage& raw();
|
|
const Stage& raw() const;
|
|
Stage& active();
|
|
const Stage& active_const() const;
|
|
void ensure_stage0();
|
|
};
|
|
|
|
// Parse "1.23" -> 1.23, true. False si la celda no es numero completo.
|
|
bool parse_number(const char* s, double& out);
|
|
|
|
// Compara dos celdas con operador. Numerico si ambas parseables; lexical si no.
|
|
bool compare(const char* a, const char* b, Op op);
|
|
|
|
// Aplica filtros y ordena. Devuelve indices de filas visibles.
|
|
std::vector<int> compute_visible_rows(const char* const* cells,
|
|
int rows, int cols,
|
|
const State& st);
|
|
|
|
// Pure: muta col_order de st para colocar `src` en la posicion (en orden visual)
|
|
// donde estaba `dst`. No-op si src == dst o cualquiera fuera del array.
|
|
void reorder_column(State& st, int src, int dst);
|
|
|
|
// Pure: dado un buffer y posicion de cursor, busca el `[` abierto sin cerrar
|
|
// mas reciente. Devuelve su indice (o -1 si ninguno). Rellena `filter_text`
|
|
// con los caracteres entre `[` y cursor.
|
|
// Para autocomplete de formulas: cuando el usuario teclea `[` el ImGui callback
|
|
// detecta esto y muestra un popup con cols disponibles.
|
|
int find_open_bracket(const char* buf, int len, int cursor, std::string& filter_text);
|
|
|
|
// Pure: reemplaza src[start..cursor) por "[name]". Devuelve nuevo string y
|
|
// actualiza `new_cursor` a la posicion despues del `]`.
|
|
std::string insert_column_ref(const std::string& src, int start, int cursor,
|
|
const std::string& name, int& new_cursor);
|
|
|
|
// CSV: escapa una celda segun RFC 4180 (wrap en " si contiene , " o newline).
|
|
std::string csv_escape(const char* s);
|
|
|
|
// Construye TSV de un rect de seleccion. Headers SIEMPRE incluidos.
|
|
// view_row_lo/hi: indices en visible_rows.
|
|
// view_col_lo/hi: indices en col_order. Cols ocultas se omiten.
|
|
std::string build_tsv(const char* const* cells, int rows, int cols,
|
|
const char* const* headers,
|
|
const std::vector<int>& col_order,
|
|
const std::vector<bool>& col_visible,
|
|
const std::vector<int>& visible_rows,
|
|
int view_row_lo, int view_row_hi,
|
|
int view_col_lo, int view_col_hi);
|
|
|
|
// Construye CSV (full visible view). Headers incluidos, cells escapados.
|
|
std::string build_csv(const char* const* cells, int rows, int cols,
|
|
const char* const* headers,
|
|
const std::vector<int>& col_order,
|
|
const std::vector<bool>& col_visible,
|
|
const std::vector<int>& visible_rows);
|
|
|
|
struct ColStats {
|
|
int total = 0; // filas escaneadas
|
|
int empty_count = 0; // cells == "" o null
|
|
int unique_count = 0; // distintas (cap configurable)
|
|
bool unique_capped = false; // true si se alcanzo el cap
|
|
bool numeric = false; // true si todas las cells no-vacias parsean como numero
|
|
int numeric_count = 0;
|
|
double min = 0;
|
|
double max = 0;
|
|
double sum = 0;
|
|
double mean = 0;
|
|
double p25 = 0;
|
|
double p50 = 0;
|
|
double p75 = 0;
|
|
std::vector<float> hist; // bins (HIST_BINS) si numeric
|
|
std::vector<std::pair<std::string,int>> top_categories; // top 8 por count desc
|
|
};
|
|
|
|
constexpr int HIST_BINS = 24;
|
|
|
|
// Pure: escanea una columna y devuelve estadisticas. `unique_cap` corta el
|
|
// conteo de unicos si excede (para datasets de millones). 0 = sin cap.
|
|
// Si `indices != nullptr` y `n_indices > 0`, recorre solo las filas indicadas
|
|
// (uso tipico: stats sobre filas visibles post-filtro).
|
|
ColStats compute_column_stats(const char* const* cells, int rows, int cols,
|
|
int col, int unique_cap = 100000,
|
|
const int* indices = nullptr, int n_indices = 0);
|
|
|
|
} // namespace data_table
|