graph_explorer/tests/test_extract_domain.py

"""Tests del enricher extract_domain.

Pure regex/parsing — sin red. Verifica:
  - Url con metadata.url crea Domain + BELONGS_TO
  - Email crea Domain (desde la parte derecha del @)
  - Si el Domain ya existe se reusa, no se duplica
"""
from __future__ import annotations

from conftest import (
    base_ctx, get_entity, list_entities, list_relations,
    make_node, run_enricher,
)


def test_url_creates_domain_and_relation(ops_db, app_dir, registry_root):
    make_node(ops_db, node_id="u1", name="ex",
              type_ref="Url", metadata={"url": "https://www.example.com/path"})
    ctx = base_ctx(ops_db=ops_db, app_dir=app_dir, registry_root=registry_root,
                   node_id="u1", node_name="ex", node_type="Url",
                   metadata={"url": "https://www.example.com/path"})

    rc, out, err = run_enricher("extract_domain", ctx)
    assert rc == 0, err
    assert out and out.get("entities_added", 0) >= 1, out

    domains = list_entities(ops_db, type_ref="Domain")
    assert any(d["name"] == "www.example.com" for d in domains), domains

    rels = list_relations(ops_db, name="BELONGS_TO")
    assert len(rels) == 1
    assert rels[0]["from_entity"] == "u1"


def test_email_creates_domain(ops_db, app_dir, registry_root):
    make_node(ops_db, node_id="e1", name="user@aurgi.com",
              type_ref="Email", metadata={"address": "user@aurgi.com"})
    ctx = base_ctx(ops_db=ops_db, app_dir=app_dir, registry_root=registry_root,
                   node_id="e1", node_name="user@aurgi.com", node_type="Email")
    rc, out, err = run_enricher("extract_domain", ctx)
    assert rc == 0, err
    domains = list_entities(ops_db, type_ref="Domain")
    assert any(d["name"] == "aurgi.com" for d in domains), domains


def test_existing_domain_is_reused(ops_db, app_dir, registry_root):
    # Pre-crear un Domain con el mismo nombre.
    make_node(ops_db, node_id="d1", name="example.com", type_ref="Domain",
              metadata={})
    make_node(ops_db, node_id="u1", name="ex", type_ref="Url",
              metadata={"url": "https://example.com/x"})
    ctx = base_ctx(ops_db=ops_db, app_dir=app_dir, registry_root=registry_root,
                   node_id="u1", node_name="ex", node_type="Url",
                   metadata={"url": "https://example.com/x"})
    rc, out, err = run_enricher("extract_domain", ctx)
    assert rc == 0, err

    domains = list_entities(ops_db, type_ref="Domain")
    names = [d["name"] for d in domains]
    assert names.count("example.com") == 1, domains