Files
graph_explorer/tests/test_manifests.py
T
egutierrez 6919ebfe9c feat(enrichers): web_search DuckDuckGo + tests pytest de los 5 enrichers
Anade enricher web_search aplicable a nodos text/Concept/Topic. Hace
POST a html.duckduckgo.com con la query del nodo, parsea resultados
con HTMLParser stdlib, decodifica el redirect uddg= y crea N nodos
Url con relacion SEARCH_RESULT_OF apuntando al nodo origen.

Encadenable: tras web_search, fetch_webpage sobre cada Url completa
el pipeline search -> fetch -> extract.

Defensa contra ops_db_path mal resuelto: normaliza backslashes,
resuelve relativo contra app_dir, valida que la tabla entities
exista antes de tocar nada (exit codes 7/8/9 con JSON resumen).

Tests pytest (16/16 verde): conftest con operations.db temp +
schema minimo, stub de requests via PYTHONPATH para mockear red.
Cubre los 5 enrichers (extract_domain, fetch_webpage, extract_links,
extract_text_entities, web_search) + sanity check de manifests.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-02 16:10:13 +02:00

73 lines
2.1 KiB
Python

"""Sanity check de los manifests YAML de todos los enrichers.
Confirma que el set actual cubre los tipos esperados y que cada manifest
tiene los campos que `enrichers.cpp` necesita parsear (id, applies_to).
"""
from __future__ import annotations
from pathlib import Path
from conftest import ENRICHERS_DIR
EXPECTED_IDS = {
"extract_domain",
"extract_links",
"extract_text_entities",
"fetch_webpage",
"web_search",
}
def _parse_simple_yaml(text: str) -> dict:
"""Parser ad-hoc que replica lo que hace enrichers.cpp."""
out: dict = {}
in_skip = False
for raw in text.splitlines():
line = raw.rstrip("\r")
s = line.strip()
if not s or s.startswith("#"):
continue
indented = line and line[0].isspace()
if not indented:
in_skip = False
if in_skip:
continue
if ":" not in s:
continue
key, _, val = s.partition(":")
key = key.strip()
val = val.strip()
if val and val[0] in ('"', "'") and val[-1] == val[0]:
val = val[1:-1]
if key == "params" and not val:
in_skip = True
out[key] = val
return out
def test_all_expected_enrichers_present():
found = {p.name for p in ENRICHERS_DIR.iterdir() if p.is_dir()}
missing = EXPECTED_IDS - found
assert not missing, f"faltan enrichers: {missing}"
def test_each_manifest_has_required_fields():
for d in ENRICHERS_DIR.iterdir():
if not d.is_dir():
continue
manifest = d / "manifest.yaml"
runpy = d / "run.py"
assert manifest.exists(), f"falta manifest: {d.name}"
assert runpy.exists(), f"falta run.py: {d.name}"
m = _parse_simple_yaml(manifest.read_text(encoding="utf-8"))
assert m.get("id") == d.name, f"id no coincide con dir: {d.name}"
assert m.get("applies_to"), f"sin applies_to: {d.name}"
assert m.get("description"), f"sin description: {d.name}"
def test_web_search_applies_to_text():
m = _parse_simple_yaml(
(ENRICHERS_DIR / "web_search" / "manifest.yaml").read_text())
assert "text" in m["applies_to"].lower()