"""Tests del enricher fetch_webpage con red mockeada via stub de requests.""" from __future__ import annotations import os from pathlib import Path from conftest import ( base_ctx, get_entity, list_entities, list_relations, make_node, run_enricher, stub_requests, ) SAMPLE_HTML = """
Esta es la pagina de prueba con un enlace.
Email de contacto: ops@acme.example
""" def test_fetch_webpage_creates_domain_and_caches(ops_db, app_dir, registry_root, tmp_path): make_node(ops_db, node_id="u1", name="acme", type_ref="Url", metadata={"url": "https://www.acme.example/"}) plan = { "default": {"text": SAMPLE_HTML, "status": 200, "headers": {"Content-Type": "text/html; charset=utf-8"}}, } env = stub_requests(tmp_path, plan) ctx = base_ctx(ops_db=ops_db, app_dir=app_dir, registry_root=registry_root, node_id="u1", node_name="acme", node_type="Url", metadata={"url": "https://www.acme.example/"}) rc, out, err = run_enricher("fetch_webpage", ctx, env=env) assert rc == 0, f"stderr={err}" assert out is not None, err assert out["status_code"] == 200 assert out["title"] == "Acme Demo" assert out["entities_added"] == 1 # Domain assert out["relations_added"] == 1 # BELONGS_TO # El nodo Url se promueve a Webpage. e = get_entity(ops_db, "u1") assert e["type_ref"] == "Webpage", e assert e["metadata"]["title"] == "Acme Demo" assert e["metadata"]["status_code"] == 200 # Cache existe. html_path = Path(app_dir) / e["metadata"]["html_path"] assert html_path.exists() assert "Acme Demo" in html_path.read_text(encoding="utf-8") # Domain creado con relacion. domains = list_entities(ops_db, type_ref="Domain") assert any(d["name"] == "www.acme.example" for d in domains) rels = list_relations(ops_db, name="BELONGS_TO") assert len(rels) == 1 def test_fetch_webpage_handles_http_error(ops_db, app_dir, registry_root, tmp_path): make_node(ops_db, node_id="u1", name="bad", type_ref="Url", metadata={"url": "https://no.example/"}) plan = {"default": {"text": "", "status": 404}} env = stub_requests(tmp_path, plan) ctx = base_ctx(ops_db=ops_db, app_dir=app_dir, registry_root=registry_root, node_id="u1", node_name="bad", node_type="Url", metadata={"url": "https://no.example/"}) rc, out, err = run_enricher("fetch_webpage", ctx, env=env) # 404 es respuesta valida — exit 0 con status_code en el resumen. assert rc == 0, err assert out["status_code"] == 404