837563c3ba
Datascience: aggregate_by_group, deduplicate_entities/relations, detect_drift, diff_entities/relations, extract_entities/relations_llm, hotness_score, melt, merge_graphs, pivot, build_entity/relation_schema_prompt. Finance: avellaneda_stoikov_quotes, generate_gbm_prices, generate_taker_order, hawkes_intensity + módulo finance.py. Cybersecurity: envelope_encrypt/decrypt + módulo cybersecurity.py. Pipelines: extraction_pipeline, monte_carlo_market, run_market_sim. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
121 lines
4.0 KiB
Python
121 lines
4.0 KiB
Python
"""Tests para merge_graphs."""
|
|
|
|
import sys
|
|
import os
|
|
|
|
sys.path.insert(0, os.path.dirname(__file__))
|
|
from merge_graphs import merge_graphs
|
|
|
|
|
|
def test_dos_grafos_con_entity_duplicada_merge():
|
|
g1 = {
|
|
"entities": [{"id": "1", "name": "Alice Corp", "type": "company"}],
|
|
"relations": [],
|
|
}
|
|
g2 = {
|
|
"entities": [{"id": "2", "name": "Alice Corp", "type": "company", "country": "US"}],
|
|
"relations": [],
|
|
}
|
|
result = merge_graphs([g1, g2], similarity_threshold=0.95)
|
|
# Nombres identicos -> similitud 1.0 -> deben mergearse
|
|
assert len(result["entities"]) == 1
|
|
assert len(result["merge_log"]) == 1
|
|
merged = result["entities"][0]
|
|
# El merge debe preservar "country" aunque el canonical no lo tuviera
|
|
assert merged.get("country") == "US" or merged.get("name") == "Alice Corp"
|
|
|
|
|
|
def test_entities_similares_pero_bajo_threshold_no_merge():
|
|
g1 = {
|
|
"entities": [{"id": "1", "name": "Alice"}],
|
|
"relations": [],
|
|
}
|
|
g2 = {
|
|
"entities": [{"id": "2", "name": "Bob"}],
|
|
"relations": [],
|
|
}
|
|
result = merge_graphs([g1, g2], similarity_threshold=0.85)
|
|
# Alice y Bob son muy distintos -> no merge
|
|
assert len(result["entities"]) == 2
|
|
assert len(result["merge_log"]) == 0
|
|
|
|
|
|
def test_relaciones_re_apuntadas_correctamente():
|
|
g1 = {
|
|
"entities": [
|
|
{"id": "1", "name": "Alice Corp"},
|
|
{"id": "2", "name": "Bob"},
|
|
],
|
|
"relations": [
|
|
{"source_id": "2", "target_id": "1", "relation_type": "works_at"},
|
|
],
|
|
}
|
|
g2 = {
|
|
"entities": [
|
|
{"id": "3", "name": "Alice Corp"}, # duplicada de id=1
|
|
],
|
|
"relations": [
|
|
{"source_id": "3", "target_id": "2", "relation_type": "knows"},
|
|
],
|
|
}
|
|
result = merge_graphs([g1, g2], similarity_threshold=0.95)
|
|
# Entity 3 mergeada en 1 -> relacion source_id=3 debe apuntar al canonical de 1
|
|
assert len(result["entities"]) == 2 # Alice Corp + Bob
|
|
# Verificar que las relaciones tienen IDs canonicos (no "3")
|
|
for rel in result["relations"]:
|
|
assert rel["source_id"] != "3"
|
|
assert rel["target_id"] != "3"
|
|
|
|
|
|
def test_merge_log_registra_cada_merge():
|
|
g1 = {
|
|
"entities": [{"id": "1", "name": "OpenAI"}],
|
|
"relations": [],
|
|
}
|
|
g2 = {
|
|
"entities": [{"id": "2", "name": "OpenAI"}],
|
|
"relations": [],
|
|
}
|
|
result = merge_graphs([g1, g2], similarity_threshold=0.9)
|
|
assert len(result["merge_log"]) == 1
|
|
log = result["merge_log"][0]
|
|
assert "merged" in log
|
|
assert "into" in log
|
|
assert "similarity" in log
|
|
assert log["similarity"] == 1.0
|
|
|
|
|
|
def test_tres_grafos_merge_transitivo():
|
|
# A~B y B~C -> A, B, C deben mergearse en uno
|
|
g1 = {"entities": [{"id": "1", "name": "Acme Corp"}], "relations": []}
|
|
g2 = {"entities": [{"id": "2", "name": "Acme Corp"}], "relations": []}
|
|
g3 = {"entities": [{"id": "3", "name": "Acme Corp"}], "relations": []}
|
|
result = merge_graphs([g1, g2, g3], similarity_threshold=0.9)
|
|
assert len(result["entities"]) == 1
|
|
|
|
|
|
def test_grafos_sin_overlap_concatenacion_simple():
|
|
g1 = {
|
|
"entities": [{"id": "1", "name": "Alice"}, {"id": "2", "name": "Bob"}],
|
|
"relations": [{"source_id": "1", "target_id": "2", "relation_type": "knows"}],
|
|
}
|
|
g2 = {
|
|
"entities": [{"id": "3", "name": "Carol"}, {"id": "4", "name": "Dave"}],
|
|
"relations": [{"source_id": "3", "target_id": "4", "relation_type": "knows"}],
|
|
}
|
|
result = merge_graphs([g1, g2], similarity_threshold=0.85)
|
|
# Ninguna entity similar -> concatenacion directa
|
|
assert len(result["entities"]) == 4
|
|
assert len(result["relations"]) == 2
|
|
assert len(result["merge_log"]) == 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
test_dos_grafos_con_entity_duplicada_merge()
|
|
test_entities_similares_pero_bajo_threshold_no_merge()
|
|
test_relaciones_re_apuntadas_correctamente()
|
|
test_merge_log_registra_cada_merge()
|
|
test_tres_grafos_merge_transitivo()
|
|
test_grafos_sin_overlap_concatenacion_simple()
|
|
print("All tests passed.")
|