feat: funciones Python datascience, finance, cybersecurity y pipelines
Datascience: aggregate_by_group, deduplicate_entities/relations, detect_drift, diff_entities/relations, extract_entities/relations_llm, hotness_score, melt, merge_graphs, pivot, build_entity/relation_schema_prompt. Finance: avellaneda_stoikov_quotes, generate_gbm_prices, generate_taker_order, hawkes_intensity + módulo finance.py. Cybersecurity: envelope_encrypt/decrypt + módulo cybersecurity.py. Pipelines: extraction_pipeline, monte_carlo_market, run_market_sim. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,77 @@
|
||||
"""diff_entities — compara dos snapshots de entities detectando cambios campo a campo."""
|
||||
|
||||
|
||||
def diff_entities(
|
||||
before: list[dict],
|
||||
after: list[dict],
|
||||
key: str = "id",
|
||||
ignore_fields: list[str] | None = None,
|
||||
compare_fields: list[str] | None = None,
|
||||
) -> dict:
|
||||
"""Compara dos snapshots de entities y devuelve diferencias campo a campo.
|
||||
|
||||
Detecta entities añadidas, eliminadas, modificadas e inalteradas.
|
||||
Ignora campos de metadata temporal por defecto (created_at, updated_at).
|
||||
|
||||
Args:
|
||||
before: Lista de entities del snapshot anterior.
|
||||
after: Lista de entities del snapshot posterior.
|
||||
key: Campo que identifica unicamente cada entity. Default "id".
|
||||
ignore_fields: Campos a excluir de la comparacion.
|
||||
Default ["created_at", "updated_at"].
|
||||
compare_fields: Si se da, solo compara estos campos (tiene prioridad
|
||||
sobre ignore_fields).
|
||||
|
||||
Returns:
|
||||
Dict con keys: added, removed, modified, unchanged, summary.
|
||||
modified contiene lista de {"key": str, "changes": {"field": {"old": ..., "new": ...}}}.
|
||||
"""
|
||||
if ignore_fields is None:
|
||||
ignore_fields = ["created_at", "updated_at"]
|
||||
|
||||
before_map = {str(e[key]): e for e in before if key in e}
|
||||
after_map = {str(e[key]): e for e in after if key in e}
|
||||
|
||||
before_keys = set(before_map.keys())
|
||||
after_keys = set(after_map.keys())
|
||||
|
||||
added = [after_map[k] for k in after_keys - before_keys]
|
||||
removed = [before_map[k] for k in before_keys - after_keys]
|
||||
|
||||
modified = []
|
||||
unchanged = 0
|
||||
|
||||
for k in before_keys & after_keys:
|
||||
b = before_map[k]
|
||||
a = after_map[k]
|
||||
|
||||
if compare_fields is not None:
|
||||
fields_to_check = compare_fields
|
||||
else:
|
||||
all_fields = set(b.keys()) | set(a.keys())
|
||||
fields_to_check = [f for f in all_fields if f not in ignore_fields and f != key]
|
||||
|
||||
changes = {}
|
||||
for field in fields_to_check:
|
||||
old_val = b.get(field)
|
||||
new_val = a.get(field)
|
||||
if old_val != new_val:
|
||||
changes[field] = {"old": old_val, "new": new_val}
|
||||
|
||||
if changes:
|
||||
modified.append({"key": k, "changes": changes})
|
||||
else:
|
||||
unchanged += 1
|
||||
|
||||
n_added = len(added)
|
||||
n_removed = len(removed)
|
||||
n_modified = len(modified)
|
||||
summary = f"{n_added} added, {n_removed} removed, {n_modified} modified, {unchanged} unchanged"
|
||||
|
||||
return {
|
||||
"added": added,
|
||||
"removed": removed,
|
||||
"modified": modified,
|
||||
"unchanged": unchanged,
|
||||
"summary": summary,
|
||||
}
|
||||
Reference in New Issue
Block a user