837563c3ba
Datascience: aggregate_by_group, deduplicate_entities/relations, detect_drift, diff_entities/relations, extract_entities/relations_llm, hotness_score, melt, merge_graphs, pivot, build_entity/relation_schema_prompt. Finance: avellaneda_stoikov_quotes, generate_gbm_prices, generate_taker_order, hawkes_intensity + módulo finance.py. Cybersecurity: envelope_encrypt/decrypt + módulo cybersecurity.py. Pipelines: extraction_pipeline, monte_carlo_market, run_market_sim. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
91 lines
3.1 KiB
Python
91 lines
3.1 KiB
Python
"""Tests para aggregate_by_group."""
|
|
|
|
import sys
|
|
import os
|
|
sys.path.insert(0, os.path.dirname(__file__))
|
|
|
|
from aggregate_by_group import aggregate_by_group
|
|
|
|
|
|
def test_group_by_una_columna_con_sum():
|
|
"""Group by una columna con sum."""
|
|
rows = [
|
|
{"dept": "eng", "salary": 100},
|
|
{"dept": "eng", "salary": 120},
|
|
{"dept": "sales", "salary": 80},
|
|
]
|
|
result = aggregate_by_group(rows, group_by=["dept"], aggs={"salary": "sum"})
|
|
assert len(result) == 2
|
|
eng = next(r for r in result if r["dept"] == "eng")
|
|
sales = next(r for r in result if r["dept"] == "sales")
|
|
assert eng["salary"] == 220
|
|
assert sales["salary"] == 80
|
|
|
|
|
|
def test_group_by_multiples_columnas():
|
|
"""Group by multiples columnas."""
|
|
rows = [
|
|
{"dept": "eng", "level": "senior", "salary": 150},
|
|
{"dept": "eng", "level": "junior", "salary": 80},
|
|
{"dept": "eng", "level": "senior", "salary": 160},
|
|
{"dept": "sales", "level": "senior", "salary": 120},
|
|
]
|
|
result = aggregate_by_group(rows, group_by=["dept", "level"], aggs={"salary": "sum"})
|
|
assert len(result) == 3
|
|
eng_senior = next(r for r in result if r["dept"] == "eng" and r["level"] == "senior")
|
|
assert eng_senior["salary"] == 310
|
|
|
|
|
|
def test_agregacion_mean_count_min_max():
|
|
"""Agregacion mean count min max."""
|
|
rows = [
|
|
{"cat": "A", "val": 10},
|
|
{"cat": "A", "val": 20},
|
|
{"cat": "A", "val": 30},
|
|
]
|
|
result_mean = aggregate_by_group(rows, group_by=["cat"], aggs={"val": "mean"})
|
|
assert result_mean[0]["val"] == 20.0
|
|
|
|
result_count = aggregate_by_group(rows, group_by=["cat"], aggs={"val": "count"})
|
|
assert result_count[0]["val"] == 3
|
|
|
|
result_min = aggregate_by_group(rows, group_by=["cat"], aggs={"val": "min"})
|
|
assert result_min[0]["val"] == 10
|
|
|
|
result_max = aggregate_by_group(rows, group_by=["cat"], aggs={"val": "max"})
|
|
assert result_max[0]["val"] == 30
|
|
|
|
|
|
def test_collect_acumula_en_lista():
|
|
"""collect acumula en lista."""
|
|
rows = [
|
|
{"dept": "eng", "name": "Alice"},
|
|
{"dept": "eng", "name": "Bob"},
|
|
{"dept": "sales", "name": "Carol"},
|
|
]
|
|
result = aggregate_by_group(rows, group_by=["dept"], aggs={"name": "collect"})
|
|
eng = next(r for r in result if r["dept"] == "eng")
|
|
assert sorted(eng["name"]) == ["Alice", "Bob"]
|
|
|
|
|
|
def test_grupo_con_una_sola_fila():
|
|
"""Grupo con una sola fila."""
|
|
rows = [{"dept": "eng", "salary": 100}]
|
|
result = aggregate_by_group(rows, group_by=["dept"], aggs={"salary": "sum"})
|
|
assert len(result) == 1
|
|
assert result[0]["salary"] == 100
|
|
|
|
|
|
def test_campo_con_none_se_ignora_en_agregaciones_numericas():
|
|
"""Campo con None se ignora en agregaciones numericas."""
|
|
rows = [
|
|
{"dept": "eng", "salary": 100},
|
|
{"dept": "eng", "salary": None},
|
|
{"dept": "eng", "salary": 200},
|
|
]
|
|
result = aggregate_by_group(rows, group_by=["dept"], aggs={"salary": "sum"})
|
|
assert result[0]["salary"] == 300
|
|
|
|
result_mean = aggregate_by_group(rows, group_by=["dept"], aggs={"salary": "mean"})
|
|
assert result_mean[0]["salary"] == 150.0
|