"""Sanity check de los manifests YAML de todos los enrichers. Confirma que el set actual cubre los tipos esperados y que cada manifest tiene los campos que `enrichers.cpp` necesita parsear (id, applies_to). """ from __future__ import annotations from pathlib import Path from conftest import ENRICHERS_DIR EXPECTED_IDS = { "extract_domain", "extract_links", "extract_text_entities", "fetch_webpage", "web_search", } def _parse_simple_yaml(text: str) -> dict: """Parser ad-hoc que replica lo que hace enrichers.cpp.""" out: dict = {} in_skip = False for raw in text.splitlines(): line = raw.rstrip("\r") s = line.strip() if not s or s.startswith("#"): continue indented = line and line[0].isspace() if not indented: in_skip = False if in_skip: continue if ":" not in s: continue key, _, val = s.partition(":") key = key.strip() val = val.strip() if val and val[0] in ('"', "'") and val[-1] == val[0]: val = val[1:-1] if key == "params" and not val: in_skip = True out[key] = val return out def test_all_expected_enrichers_present(): found = {p.name for p in ENRICHERS_DIR.iterdir() if p.is_dir()} missing = EXPECTED_IDS - found assert not missing, f"faltan enrichers: {missing}" def test_each_manifest_has_required_fields(): for d in ENRICHERS_DIR.iterdir(): if not d.is_dir(): continue manifest = d / "manifest.yaml" runpy = d / "run.py" assert manifest.exists(), f"falta manifest: {d.name}" assert runpy.exists(), f"falta run.py: {d.name}" m = _parse_simple_yaml(manifest.read_text(encoding="utf-8")) assert m.get("id") == d.name, f"id no coincide con dir: {d.name}" assert m.get("applies_to"), f"sin applies_to: {d.name}" assert m.get("description"), f"sin description: {d.name}" def test_web_search_applies_to_text(): m = _parse_simple_yaml( (ENRICHERS_DIR / "web_search" / "manifest.yaml").read_text()) assert "text" in m["applies_to"].lower()