"""Tests del enricher web_search (DuckDuckGo HTML).""" from __future__ import annotations from pathlib import Path from conftest import ( base_ctx, list_entities, list_relations, make_node, run_enricher, stub_requests, TESTS_DIR, ) DDG_FIXTURE = TESTS_DIR / "fixtures" / "ddg_results.html" def test_web_search_creates_url_results_for_text_node( ops_db, app_dir, registry_root, tmp_path): make_node(ops_db, node_id="t1", name="tomate", type_ref="text", metadata={}) plan = { "match": [ {"contains": "duckduckgo.com", "text": DDG_FIXTURE.read_text(encoding="utf-8"), "status": 200}, ], "default": {"text": "", "status": 404}, } env = stub_requests(tmp_path, plan) ctx = base_ctx(ops_db=ops_db, app_dir=app_dir, registry_root=registry_root, node_id="t1", node_name="tomate", node_type="text", params={"limit": 5}) rc, out, err = run_enricher("web_search", ctx, env=env) assert rc == 0, f"stderr={err}" assert out is not None, err assert out["engine"] == "duckduckgo" assert out["results"] == 3, out assert out["entities_added"] == 3 assert out["relations_added"] == 3 urls = list_entities(ops_db, type_ref="Url") targets = {e["metadata"].get("url") for e in urls} assert "https://es.wikipedia.org/wiki/Tomate" in targets assert "https://www.botanical-online.com/alimentos/tomate-propiedades" in targets rels = list_relations(ops_db, name="SEARCH_RESULT_OF") assert len(rels) == 3 assert all(r["to_entity"] == "t1" for r in rels) # Metadata enriquecida. wiki = next(e for e in urls if e["metadata"].get("url") == "https://es.wikipedia.org/wiki/Tomate") assert wiki["metadata"]["query"] == "tomate" assert wiki["metadata"]["rank"] == 1 assert "Wikipedia" in wiki["metadata"]["title"] def test_web_search_uses_metadata_query_over_name(ops_db, app_dir, registry_root, tmp_path): """metadata.query debe ganar prioridad sobre node_name.""" make_node(ops_db, node_id="t1", name="placeholder", type_ref="text", metadata={"query": "tomate"}) plan = {"match": [{"contains": "duckduckgo.com", "text": DDG_FIXTURE.read_text(encoding="utf-8")}]} env = stub_requests(tmp_path, plan) ctx = base_ctx(ops_db=ops_db, app_dir=app_dir, registry_root=registry_root, node_id="t1", node_name="placeholder", node_type="text", metadata={"query": "tomate"}) rc, out, err = run_enricher("web_search", ctx, env=env) assert rc == 0, err assert out["query"] == "tomate" def test_web_search_limit_truncates_results(ops_db, app_dir, registry_root, tmp_path): make_node(ops_db, node_id="t1", name="tomate", type_ref="text") plan = {"match": [{"contains": "duckduckgo.com", "text": DDG_FIXTURE.read_text(encoding="utf-8")}]} env = stub_requests(tmp_path, plan) ctx = base_ctx(ops_db=ops_db, app_dir=app_dir, registry_root=registry_root, node_id="t1", node_name="tomate", node_type="text", params={"limit": 1}) rc, out, err = run_enricher("web_search", ctx, env=env) assert rc == 0, err assert out["results"] == 1 assert out["entities_added"] == 1 def test_web_search_no_query_fails_clean(ops_db, app_dir, registry_root, tmp_path): make_node(ops_db, node_id="t1", name="", type_ref="text", metadata={}) env = stub_requests(tmp_path, {"default": {"text": "", "status": 200}}) ctx = base_ctx(ops_db=ops_db, app_dir=app_dir, registry_root=registry_root, node_id="t1", node_name="", node_type="text") rc, out, err = run_enricher("web_search", ctx, env=env) assert rc == 2 assert "sin query" in err