Files
egutierrez 837563c3ba feat: funciones Python datascience, finance, cybersecurity y pipelines
Datascience: aggregate_by_group, deduplicate_entities/relations, detect_drift,
diff_entities/relations, extract_entities/relations_llm, hotness_score, melt,
merge_graphs, pivot, build_entity/relation_schema_prompt.
Finance: avellaneda_stoikov_quotes, generate_gbm_prices, generate_taker_order,
hawkes_intensity + módulo finance.py.
Cybersecurity: envelope_encrypt/decrypt + módulo cybersecurity.py.
Pipelines: extraction_pipeline, monte_carlo_market, run_market_sim.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-05 17:11:32 +02:00

91 lines
3.1 KiB
Python

"""Tests para aggregate_by_group."""
import sys
import os
sys.path.insert(0, os.path.dirname(__file__))
from aggregate_by_group import aggregate_by_group
def test_group_by_una_columna_con_sum():
"""Group by una columna con sum."""
rows = [
{"dept": "eng", "salary": 100},
{"dept": "eng", "salary": 120},
{"dept": "sales", "salary": 80},
]
result = aggregate_by_group(rows, group_by=["dept"], aggs={"salary": "sum"})
assert len(result) == 2
eng = next(r for r in result if r["dept"] == "eng")
sales = next(r for r in result if r["dept"] == "sales")
assert eng["salary"] == 220
assert sales["salary"] == 80
def test_group_by_multiples_columnas():
"""Group by multiples columnas."""
rows = [
{"dept": "eng", "level": "senior", "salary": 150},
{"dept": "eng", "level": "junior", "salary": 80},
{"dept": "eng", "level": "senior", "salary": 160},
{"dept": "sales", "level": "senior", "salary": 120},
]
result = aggregate_by_group(rows, group_by=["dept", "level"], aggs={"salary": "sum"})
assert len(result) == 3
eng_senior = next(r for r in result if r["dept"] == "eng" and r["level"] == "senior")
assert eng_senior["salary"] == 310
def test_agregacion_mean_count_min_max():
"""Agregacion mean count min max."""
rows = [
{"cat": "A", "val": 10},
{"cat": "A", "val": 20},
{"cat": "A", "val": 30},
]
result_mean = aggregate_by_group(rows, group_by=["cat"], aggs={"val": "mean"})
assert result_mean[0]["val"] == 20.0
result_count = aggregate_by_group(rows, group_by=["cat"], aggs={"val": "count"})
assert result_count[0]["val"] == 3
result_min = aggregate_by_group(rows, group_by=["cat"], aggs={"val": "min"})
assert result_min[0]["val"] == 10
result_max = aggregate_by_group(rows, group_by=["cat"], aggs={"val": "max"})
assert result_max[0]["val"] == 30
def test_collect_acumula_en_lista():
"""collect acumula en lista."""
rows = [
{"dept": "eng", "name": "Alice"},
{"dept": "eng", "name": "Bob"},
{"dept": "sales", "name": "Carol"},
]
result = aggregate_by_group(rows, group_by=["dept"], aggs={"name": "collect"})
eng = next(r for r in result if r["dept"] == "eng")
assert sorted(eng["name"]) == ["Alice", "Bob"]
def test_grupo_con_una_sola_fila():
"""Grupo con una sola fila."""
rows = [{"dept": "eng", "salary": 100}]
result = aggregate_by_group(rows, group_by=["dept"], aggs={"salary": "sum"})
assert len(result) == 1
assert result[0]["salary"] == 100
def test_campo_con_none_se_ignora_en_agregaciones_numericas():
"""Campo con None se ignora en agregaciones numericas."""
rows = [
{"dept": "eng", "salary": 100},
{"dept": "eng", "salary": None},
{"dept": "eng", "salary": 200},
]
result = aggregate_by_group(rows, group_by=["dept"], aggs={"salary": "sum"})
assert result[0]["salary"] == 300
result_mean = aggregate_by_group(rows, group_by=["dept"], aggs={"salary": "mean"})
assert result_mean[0]["salary"] == 150.0