graph_explorer/tests/test_manifests.py

"""Sanity check de los manifests YAML de todos los enrichers.

Confirma que el set actual cubre los tipos esperados y que cada manifest
tiene los campos que `enrichers.cpp` necesita parsear (id, applies_to).
"""
from __future__ import annotations

from pathlib import Path

from conftest import ENRICHERS_DIR


EXPECTED_IDS = {
    "extract_domain",
    "extract_links",
    "extract_text_entities",
    "fetch_webpage",
    "web_search",
}


def _parse_simple_yaml(text: str) -> dict:
    """Parser ad-hoc que replica lo que hace enrichers.cpp."""
    out: dict = {}
    in_skip = False
    for raw in text.splitlines():
        line = raw.rstrip("\r")
        s = line.strip()
        if not s or s.startswith("#"):
            continue
        indented = line and line[0].isspace()
        if not indented:
            in_skip = False
        if in_skip:
            continue
        if ":" not in s:
            continue
        key, _, val = s.partition(":")
        key = key.strip()
        val = val.strip()
        if val and val[0] in ('"', "'") and val[-1] == val[0]:
            val = val[1:-1]
        if key == "params" and not val:
            in_skip = True
        out[key] = val
    return out


def test_all_expected_enrichers_present():
    found = {p.name for p in ENRICHERS_DIR.iterdir() if p.is_dir()}
    missing = EXPECTED_IDS - found
    assert not missing, f"faltan enrichers: {missing}"


def test_each_manifest_has_required_fields():
    for d in ENRICHERS_DIR.iterdir():
        if not d.is_dir():
            continue
        manifest = d / "manifest.yaml"
        runpy    = d / "run.py"
        assert manifest.exists(), f"falta manifest: {d.name}"
        assert runpy.exists(),    f"falta run.py:  {d.name}"
        m = _parse_simple_yaml(manifest.read_text(encoding="utf-8"))
        assert m.get("id") == d.name, f"id no coincide con dir: {d.name}"
        assert m.get("applies_to"), f"sin applies_to: {d.name}"
        assert m.get("description"), f"sin description: {d.name}"


def test_web_search_applies_to_text():
    m = _parse_simple_yaml(
        (ENRICHERS_DIR / "web_search" / "manifest.yaml").read_text())
    assert "text" in m["applies_to"].lower()