Files
fn_registry/python/functions/datascience/merge_graphs_test.py
egutierrez 63a9cb5273 feat: funciones Python datascience, finance, cybersecurity y pipelines
Datascience: aggregate_by_group, deduplicate_entities/relations, detect_drift,
diff_entities/relations, extract_entities/relations_llm, hotness_score, melt,
merge_graphs, pivot, build_entity/relation_schema_prompt.
Finance: avellaneda_stoikov_quotes, generate_gbm_prices, generate_taker_order,
hawkes_intensity + módulo finance.py.
Cybersecurity: envelope_encrypt/decrypt + módulo cybersecurity.py.
Pipelines: extraction_pipeline, monte_carlo_market, run_market_sim.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-05 17:11:32 +02:00

121 lines
4.0 KiB
Python

"""Tests para merge_graphs."""
import sys
import os
sys.path.insert(0, os.path.dirname(__file__))
from merge_graphs import merge_graphs
def test_dos_grafos_con_entity_duplicada_merge():
g1 = {
"entities": [{"id": "1", "name": "Alice Corp", "type": "company"}],
"relations": [],
}
g2 = {
"entities": [{"id": "2", "name": "Alice Corp", "type": "company", "country": "US"}],
"relations": [],
}
result = merge_graphs([g1, g2], similarity_threshold=0.95)
# Nombres identicos -> similitud 1.0 -> deben mergearse
assert len(result["entities"]) == 1
assert len(result["merge_log"]) == 1
merged = result["entities"][0]
# El merge debe preservar "country" aunque el canonical no lo tuviera
assert merged.get("country") == "US" or merged.get("name") == "Alice Corp"
def test_entities_similares_pero_bajo_threshold_no_merge():
g1 = {
"entities": [{"id": "1", "name": "Alice"}],
"relations": [],
}
g2 = {
"entities": [{"id": "2", "name": "Bob"}],
"relations": [],
}
result = merge_graphs([g1, g2], similarity_threshold=0.85)
# Alice y Bob son muy distintos -> no merge
assert len(result["entities"]) == 2
assert len(result["merge_log"]) == 0
def test_relaciones_re_apuntadas_correctamente():
g1 = {
"entities": [
{"id": "1", "name": "Alice Corp"},
{"id": "2", "name": "Bob"},
],
"relations": [
{"source_id": "2", "target_id": "1", "relation_type": "works_at"},
],
}
g2 = {
"entities": [
{"id": "3", "name": "Alice Corp"}, # duplicada de id=1
],
"relations": [
{"source_id": "3", "target_id": "2", "relation_type": "knows"},
],
}
result = merge_graphs([g1, g2], similarity_threshold=0.95)
# Entity 3 mergeada en 1 -> relacion source_id=3 debe apuntar al canonical de 1
assert len(result["entities"]) == 2 # Alice Corp + Bob
# Verificar que las relaciones tienen IDs canonicos (no "3")
for rel in result["relations"]:
assert rel["source_id"] != "3"
assert rel["target_id"] != "3"
def test_merge_log_registra_cada_merge():
g1 = {
"entities": [{"id": "1", "name": "OpenAI"}],
"relations": [],
}
g2 = {
"entities": [{"id": "2", "name": "OpenAI"}],
"relations": [],
}
result = merge_graphs([g1, g2], similarity_threshold=0.9)
assert len(result["merge_log"]) == 1
log = result["merge_log"][0]
assert "merged" in log
assert "into" in log
assert "similarity" in log
assert log["similarity"] == 1.0
def test_tres_grafos_merge_transitivo():
# A~B y B~C -> A, B, C deben mergearse en uno
g1 = {"entities": [{"id": "1", "name": "Acme Corp"}], "relations": []}
g2 = {"entities": [{"id": "2", "name": "Acme Corp"}], "relations": []}
g3 = {"entities": [{"id": "3", "name": "Acme Corp"}], "relations": []}
result = merge_graphs([g1, g2, g3], similarity_threshold=0.9)
assert len(result["entities"]) == 1
def test_grafos_sin_overlap_concatenacion_simple():
g1 = {
"entities": [{"id": "1", "name": "Alice"}, {"id": "2", "name": "Bob"}],
"relations": [{"source_id": "1", "target_id": "2", "relation_type": "knows"}],
}
g2 = {
"entities": [{"id": "3", "name": "Carol"}, {"id": "4", "name": "Dave"}],
"relations": [{"source_id": "3", "target_id": "4", "relation_type": "knows"}],
}
result = merge_graphs([g1, g2], similarity_threshold=0.85)
# Ninguna entity similar -> concatenacion directa
assert len(result["entities"]) == 4
assert len(result["relations"]) == 2
assert len(result["merge_log"]) == 0
if __name__ == "__main__":
test_dos_grafos_con_entity_duplicada_merge()
test_entities_similares_pero_bajo_threshold_no_merge()
test_relaciones_re_apuntadas_correctamente()
test_merge_log_registra_cada_merge()
test_tres_grafos_merge_transitivo()
test_grafos_sin_overlap_concatenacion_simple()
print("All tests passed.")