feat: funciones Python datascience, finance, cybersecurity y pipelines

Datascience: aggregate_by_group, deduplicate_entities/relations, detect_drift, diff_entities/relations, extract_entities/relations_llm, hotness_score, melt, merge_graphs, pivot, build_entity/relation_schema_prompt. Finance: avellaneda_stoikov_quotes, generate_gbm_prices, generate_taker_order, hawkes_intensity + módulo finance.py. Cybersecurity: envelope_encrypt/decrypt + módulo cybersecurity.py. Pipelines: extraction_pipeline, monte_carlo_market, run_market_sim. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-05 17:11:32 +02:00
parent 928a3319d5
commit 837563c3ba
62 changed files with 5376 additions and 0 deletions
@@ -0,0 +1,90 @@
+"""Tests para detect_drift."""
+
+import sys
+import os
+import math
+
+sys.path.insert(0, os.path.dirname(__file__))
+from detect_drift import detect_drift
+
+
+def test_campo_con_drift_claro_z_mayor_threshold():
+    history = [
+        {"records_out": 100},
+        {"records_out": 105},
+        {"records_out": 98},
+    ]
+    current = {"records_out": 50}
+    results = detect_drift(history, current, ["records_out"])
+    assert len(results) == 1
+    r = results[0]
+    assert r["field"] == "records_out"
+    assert r["current"] == 50.0
+    assert r["drifted"] is True
+    assert r["z_score"] < -2.0  # muy lejos de la media
+
+
+def test_campo_estable_z_menor_threshold():
+    history = [
+        {"val": 100.0},
+        {"val": 102.0},
+        {"val": 98.0},
+        {"val": 101.0},
+    ]
+    current = {"val": 100.5}  # dentro del rango normal
+    results = detect_drift(history, current, ["val"])
+    assert len(results) == 1
+    r = results[0]
+    assert r["drifted"] is False
+    assert abs(r["z_score"]) < 2.0
+
+
+def test_historial_con_un_solo_punto_std_0_drifted_False_con_nota():
+    history = [{"val": 100.0}]
+    current = {"val": 999.0}
+    results = detect_drift(history, current, ["val"])
+    assert len(results) == 1
+    r = results[0]
+    assert r["std"] == 0.0
+    assert r["z_score"] == 0.0
+    assert r["drifted"] is False
+    assert r["mean"] == 100.0
+
+
+def test_historial_vacio_todos_drifted_False():
+    history = []
+    current = {"records_out": 50, "duration_ms": 2000}
+    results = detect_drift(history, current, ["records_out", "duration_ms"])
+    assert len(results) == 2
+    for r in results:
+        assert r["drifted"] is False
+        assert r["z_score"] == 0.0
+        assert r["mean"] == 0.0
+
+
+def test_threshold_custom():
+    history = [
+        {"val": 100.0},
+        {"val": 100.0},
+        {"val": 110.0},
+        {"val": 90.0},
+    ]
+    # std ~ 7.07, mean = 100
+    current = {"val": 115.0}  # z ~ 2.12
+
+    # threshold default 2.0 -> drifted
+    results = detect_drift(history, current, ["val"], threshold=2.0)
+    assert results[0]["drifted"] is True
+
+    # threshold 3.0 -> no drifted
+    results2 = detect_drift(history, current, ["val"], threshold=3.0)
+    assert results2[0]["drifted"] is False
+
+
+if __name__ == "__main__":
+    test_campo_con_drift_claro_z_mayor_threshold()
+    test_campo_estable_z_menor_threshold()
+    test_historial_con_un_solo_punto_std_0_drifted_False_con_nota()
+    test_historial_vacio_todos_drifted_False()
+    test_threshold_custom()
+    print("All tests passed.")