7a94160fd2
Bloque de cambios revisados y validados con el usuario en sesiones previas que no habian aterrizado en commits propios. Lista por tema: * enrichers: web_search ahora usa lite.duckduckgo.com como endpoint primario (mas tolerante con bot detection desde IP residencial), con fallback al endpoint html. Detecta pagina captcha y emite error claro si ambos fallan. Anyade _DDGLiteParser para el formato lite + auto-pick de parser por contenido. * enrichers: tipo Webpage unificado en Url (campos de cuerpo cacheado viven en metadata del Url). Manifests actualizados (applies_to: [Url]). fetch_webpage ya no convierte Url->Webpage. * enrichers/manifest: campo `params` parseado a EnricherSpec.params (name, type, default_value, description). UI puede renderizar dialog de configuracion. * jobs: fix de path conversion para Python embebido nativo Windows (no convertir a /mnt/c/... cuando el subproceso es Windows-native; solo cuando es bash o python via WSL). * main.cpp: ventana ImGui (no modal) "Run enricher" con layout 2-col (label izq, input der). Inserta job con JSON tipado. Layout clustering apretado: hijos del mismo anchor en un solo anillo alrededor del padre, sin desperdigar por anillos crecientes. * views: inspector con layout 2-col via BeginTable (Identity, Schema fields, Extras). Description full-width debajo de su label. * tests: portable conftest (auto-detecta REGISTRY_ROOT, PYTHON_BIN, ENRICHERS_DIR para WSL y Windows portable). _runner.py trampoline inyecta stub via sys.path porque embedded Python ignora PYTHONPATH. Tests bash-only (vendor_script, freeze, dispatcher bash, resolver Linux-binary) skipean en Windows. Tests existentes adaptados a Webpage->Url. Resultado actual: 32 passed WSL, 21 passed + 11 skipped Windows.
146 lines
5.2 KiB
Python
146 lines
5.2 KiB
Python
"""Tests del script tools/vendor_enricher_python.sh (issue 0033b).
|
|
|
|
Verifica:
|
|
- manifest sin uses_functions Python -> no crea _vendored/.
|
|
- manifest con un uses_functions -> copia el .py + __init__.
|
|
- dep transitiva (extract_iocs importa siblings) -> copia siblings.
|
|
- .vendor.lock con SHA256 + path origen.
|
|
- Idempotencia: 2da llamada con mismo estado no rehace nada.
|
|
- Cambio en el manifest invalida el lock.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import hashlib
|
|
import os
|
|
import shutil
|
|
import subprocess
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from conftest import APP_DIR_SRC, REGISTRY_ROOT
|
|
|
|
|
|
SCRIPT = APP_DIR_SRC / "tools" / "vendor_enricher_python.sh"
|
|
|
|
# El script vendor es bash-only y vive en el repo dev. En la carpeta
|
|
# portable de Windows no esta presente; ademas necesitaria un bash
|
|
# real para ejecutarse. Saltamos toda la suite si:
|
|
# - no encontramos `bash` en PATH (Windows), o
|
|
# - el script no existe (deploy portable sin tools/).
|
|
pytestmark = pytest.mark.skipif(
|
|
shutil.which("bash") is None or not SCRIPT.exists(),
|
|
reason="bash o tools/vendor_enricher_python.sh no disponible "
|
|
"(esperado en deploy portable)",
|
|
)
|
|
|
|
|
|
def _make_enricher_dir(tmp_path: Path, manifest: str) -> Path:
|
|
enr = tmp_path / "test_enricher"
|
|
enr.mkdir()
|
|
(enr / "manifest.yaml").write_text(manifest, encoding="utf-8")
|
|
(enr / "run.py").write_text("# stub\n", encoding="utf-8")
|
|
return enr
|
|
|
|
|
|
def _run_vendor(enr_dir: Path) -> subprocess.CompletedProcess:
|
|
return subprocess.run(
|
|
["bash", str(SCRIPT), str(enr_dir), str(REGISTRY_ROOT)],
|
|
capture_output=True, text=True, timeout=20,
|
|
)
|
|
|
|
|
|
def test_no_uses_functions_does_not_create_vendored(tmp_path):
|
|
enr = _make_enricher_dir(tmp_path,
|
|
"id: x\nname: x\napplies_to: [text]\n")
|
|
proc = _run_vendor(enr)
|
|
assert proc.returncode == 0, proc.stderr
|
|
assert not (enr / "_vendored").exists()
|
|
assert not (enr / ".vendor.lock").exists()
|
|
|
|
|
|
def test_single_dep_creates_vendored_layout(tmp_path):
|
|
enr = _make_enricher_dir(tmp_path,
|
|
"id: x\nname: x\napplies_to: [Url]\n"
|
|
"uses_functions:\n"
|
|
" - normalize_url_py_cybersecurity\n")
|
|
proc = _run_vendor(enr)
|
|
assert proc.returncode == 0, proc.stderr
|
|
assert (enr / "_vendored" / "__init__.py").exists()
|
|
assert (enr / "_vendored" / "cybersecurity" / "__init__.py").exists()
|
|
assert (enr / "_vendored" / "cybersecurity" / "cybersecurity.py").exists()
|
|
assert (enr / ".vendor.lock").exists()
|
|
lock = (enr / ".vendor.lock").read_text()
|
|
assert "normalize_url_py_cybersecurity" in lock
|
|
|
|
|
|
def test_transitive_siblings_are_copied(tmp_path):
|
|
"""extract_iocs.py importa 7 modulos siblings — todos deben venir."""
|
|
enr = _make_enricher_dir(tmp_path,
|
|
"id: x\nname: x\napplies_to: [Webpage]\n"
|
|
"uses_functions:\n"
|
|
" - extract_iocs_py_cybersecurity\n")
|
|
proc = _run_vendor(enr)
|
|
assert proc.returncode == 0, proc.stderr
|
|
cyb = enr / "_vendored" / "cybersecurity"
|
|
assert (cyb / "extract_iocs.py").exists()
|
|
expected_siblings = {
|
|
"extract_ip_addresses.py", "extract_emails.py",
|
|
"extract_domains.py", "extract_file_hashes.py",
|
|
"extract_crypto_wallets.py", "extract_cve_ids.py",
|
|
"extract_mac_addresses.py", "extract_phone_numbers.py",
|
|
}
|
|
found = {p.name for p in cyb.glob("*.py")}
|
|
missing = expected_siblings - found
|
|
assert not missing, f"siblings no copiados: {missing}"
|
|
|
|
|
|
def test_lock_contains_correct_sha256(tmp_path):
|
|
enr = _make_enricher_dir(tmp_path,
|
|
"id: x\nname: x\napplies_to: [Url]\n"
|
|
"uses_functions:\n - normalize_url_py_cybersecurity\n")
|
|
proc = _run_vendor(enr)
|
|
assert proc.returncode == 0, proc.stderr
|
|
|
|
src = REGISTRY_ROOT / "python" / "functions" / "cybersecurity" / "cybersecurity.py"
|
|
expected_sha = hashlib.sha256(src.read_bytes()).hexdigest()
|
|
|
|
lock = (enr / ".vendor.lock").read_text()
|
|
assert expected_sha in lock, lock
|
|
|
|
|
|
def test_idempotency_skips_when_unchanged(tmp_path):
|
|
enr = _make_enricher_dir(tmp_path,
|
|
"id: x\nname: x\napplies_to: [Url]\n"
|
|
"uses_functions:\n - normalize_url_py_cybersecurity\n")
|
|
p1 = _run_vendor(enr)
|
|
assert p1.returncode == 0
|
|
p2 = _run_vendor(enr)
|
|
assert p2.returncode == 0
|
|
assert "sin cambios" in p2.stdout, p2.stdout
|
|
|
|
|
|
def test_vendored_module_can_be_imported_in_isolation(tmp_path):
|
|
"""Smoke: el _vendored/ resultante es importable sin registry_root."""
|
|
enr = _make_enricher_dir(tmp_path,
|
|
"id: x\nname: x\napplies_to: [Webpage]\n"
|
|
"uses_functions:\n - extract_urls_py_cybersecurity\n")
|
|
proc = _run_vendor(enr)
|
|
assert proc.returncode == 0, proc.stderr
|
|
|
|
# Lanzamos un Python externo con _vendored como unico path adicional.
|
|
code = (
|
|
"import sys; sys.path.insert(0, 'enrichers_test/_vendored');"
|
|
"from cybersecurity.cybersecurity import extract_urls;"
|
|
"print(len(extract_urls('foo http://x.com bar')))"
|
|
)
|
|
# Crear symlink temporal con el nombre esperado.
|
|
fake = tmp_path / "enrichers_test"
|
|
fake.symlink_to(enr)
|
|
proc2 = subprocess.run(
|
|
["python3", "-c", code],
|
|
cwd=str(tmp_path), capture_output=True, text=True, timeout=10,
|
|
)
|
|
assert proc2.returncode == 0, proc2.stderr
|
|
assert proc2.stdout.strip() == "1"
|