"""Tests para merge_graphs.""" import sys import os sys.path.insert(0, os.path.dirname(__file__)) from merge_graphs import merge_graphs def test_dos_grafos_con_entity_duplicada_merge(): g1 = { "entities": [{"id": "1", "name": "Alice Corp", "type": "company"}], "relations": [], } g2 = { "entities": [{"id": "2", "name": "Alice Corp", "type": "company", "country": "US"}], "relations": [], } result = merge_graphs([g1, g2], similarity_threshold=0.95) # Nombres identicos -> similitud 1.0 -> deben mergearse assert len(result["entities"]) == 1 assert len(result["merge_log"]) == 1 merged = result["entities"][0] # El merge debe preservar "country" aunque el canonical no lo tuviera assert merged.get("country") == "US" or merged.get("name") == "Alice Corp" def test_entities_similares_pero_bajo_threshold_no_merge(): g1 = { "entities": [{"id": "1", "name": "Alice"}], "relations": [], } g2 = { "entities": [{"id": "2", "name": "Bob"}], "relations": [], } result = merge_graphs([g1, g2], similarity_threshold=0.85) # Alice y Bob son muy distintos -> no merge assert len(result["entities"]) == 2 assert len(result["merge_log"]) == 0 def test_relaciones_re_apuntadas_correctamente(): g1 = { "entities": [ {"id": "1", "name": "Alice Corp"}, {"id": "2", "name": "Bob"}, ], "relations": [ {"source_id": "2", "target_id": "1", "relation_type": "works_at"}, ], } g2 = { "entities": [ {"id": "3", "name": "Alice Corp"}, # duplicada de id=1 ], "relations": [ {"source_id": "3", "target_id": "2", "relation_type": "knows"}, ], } result = merge_graphs([g1, g2], similarity_threshold=0.95) # Entity 3 mergeada en 1 -> relacion source_id=3 debe apuntar al canonical de 1 assert len(result["entities"]) == 2 # Alice Corp + Bob # Verificar que las relaciones tienen IDs canonicos (no "3") for rel in result["relations"]: assert rel["source_id"] != "3" assert rel["target_id"] != "3" def test_merge_log_registra_cada_merge(): g1 = { "entities": [{"id": "1", "name": "OpenAI"}], "relations": [], } g2 = { "entities": [{"id": "2", "name": "OpenAI"}], "relations": [], } result = merge_graphs([g1, g2], similarity_threshold=0.9) assert len(result["merge_log"]) == 1 log = result["merge_log"][0] assert "merged" in log assert "into" in log assert "similarity" in log assert log["similarity"] == 1.0 def test_tres_grafos_merge_transitivo(): # A~B y B~C -> A, B, C deben mergearse en uno g1 = {"entities": [{"id": "1", "name": "Acme Corp"}], "relations": []} g2 = {"entities": [{"id": "2", "name": "Acme Corp"}], "relations": []} g3 = {"entities": [{"id": "3", "name": "Acme Corp"}], "relations": []} result = merge_graphs([g1, g2, g3], similarity_threshold=0.9) assert len(result["entities"]) == 1 def test_grafos_sin_overlap_concatenacion_simple(): g1 = { "entities": [{"id": "1", "name": "Alice"}, {"id": "2", "name": "Bob"}], "relations": [{"source_id": "1", "target_id": "2", "relation_type": "knows"}], } g2 = { "entities": [{"id": "3", "name": "Carol"}, {"id": "4", "name": "Dave"}], "relations": [{"source_id": "3", "target_id": "4", "relation_type": "knows"}], } result = merge_graphs([g1, g2], similarity_threshold=0.85) # Ninguna entity similar -> concatenacion directa assert len(result["entities"]) == 4 assert len(result["relations"]) == 2 assert len(result["merge_log"]) == 0 if __name__ == "__main__": test_dos_grafos_con_entity_duplicada_merge() test_entities_similares_pero_bajo_threshold_no_merge() test_relaciones_re_apuntadas_correctamente() test_merge_log_registra_cada_merge() test_tres_grafos_merge_transitivo() test_grafos_sin_overlap_concatenacion_simple() print("All tests passed.")