feat: catch-up de decisiones previas (Webpage→Url, anti-bot, UI 2-col, tests cross-platform)

Bloque de cambios revisados y validados con el usuario en sesiones previas que no habian aterrizado en commits propios. Lista por tema: * enrichers: web_search ahora usa lite.duckduckgo.com como endpoint primario (mas tolerante con bot detection desde IP residencial), con fallback al endpoint html. Detecta pagina captcha y emite error claro si ambos fallan. Anyade _DDGLiteParser para el formato lite + auto-pick de parser por contenido. * enrichers: tipo Webpage unificado en Url (campos de cuerpo cacheado viven en metadata del Url). Manifests actualizados (applies_to: [Url]). fetch_webpage ya no convierte Url->Webpage. * enrichers/manifest: campo `params` parseado a EnricherSpec.params (name, type, default_value, description). UI puede renderizar dialog de configuracion. * jobs: fix de path conversion para Python embebido nativo Windows (no convertir a /mnt/c/... cuando el subproceso es Windows-native; solo cuando es bash o python via WSL). * main.cpp: ventana ImGui (no modal) "Run enricher" con layout 2-col (label izq, input der). Inserta job con JSON tipado. Layout clustering apretado: hijos del mismo anchor en un solo anillo alrededor del padre, sin desperdigar por anillos crecientes. * views: inspector con layout 2-col via BeginTable (Identity, Schema fields, Extras). Description full-width debajo de su label. * tests: portable conftest (auto-detecta REGISTRY_ROOT, PYTHON_BIN, ENRICHERS_DIR para WSL y Windows portable). _runner.py trampoline inyecta stub via sys.path porque embedded Python ignora PYTHONPATH. Tests bash-only (vendor_script, freeze, dispatcher bash, resolver Linux-binary) skipean en Windows. Tests existentes adaptados a Webpage->Url. Resultado actual: 32 passed WSL, 21 passed + 11 skipped Windows.
2026-05-03 14:41:28 +02:00
parent 4be5734ce5
commit 7a94160fd2
26 changed files with 973 additions and 241 deletions
@@ -59,6 +59,57 @@ std::vector<std::string> parse_inline_list(const std::string& v) {
    return out;
 }
 // Split por comas a nivel cero, respetando comillas y nesting de [] / {}.
 // El YAML inline `{ name: limit, type: int, default: 10 }` puede contener
 // strings con comas entre comillas — un split crudo las rompería.
 std::vector<std::string> split_top_level(const std::string& s) {
    std::vector<std::string> out;
    std::string cur;
    int depth_b = 0, depth_c = 0;
    char quote = 0;
    for (char c : s) {
        if (quote) {
            cur.push_back(c);
            if (c == quote) quote = 0;
            continue;
        }
        if (c == '"' || c == '\'') { quote = c; cur.push_back(c); continue; }
        if (c == '[') ++depth_b;
        if (c == ']') --depth_b;
        if (c == '{') ++depth_c;
        if (c == '}') --depth_c;
        if (c == ',' && depth_b == 0 && depth_c == 0) {
            out.push_back(cur);
            cur.clear();
            continue;
        }
        cur.push_back(c);
    }
    if (!cur.empty()) out.push_back(cur);
    return out;
 }
 // Parsea un objeto YAML inline `{ name: x, type: int, default: 10 }` a un
 // EnricherParam. Retorna true si al menos `name` se resolvio.
 bool parse_inline_param(const std::string& v, EnricherParam* out) {
    std::string s = strip(v);
    if (s.size() < 2 || s.front() != '{' || s.back() != '}') return false;
    s = s.substr(1, s.size() - 2);
    for (auto& kv : split_top_level(s)) {
        size_t colon = kv.find(':');
        if (colon == std::string::npos) continue;
        std::string k = strip(kv.substr(0, colon));
        std::string val = strip_quotes(strip(kv.substr(colon + 1)));
        if      (k == "name")        out->name = val;
        else if (k == "type")        out->type = lower(val);
        else if (k == "default")     out->default_value = val;
        else if (k == "description") out->description = val;
        else if (k == "desc")        out->description = val;
    }
    if (out->type.empty()) out->type = "string";
    return !out->name.empty();
 }
 // Manifest YAML soportado (subset):
 //   id: fetch_webpage
 //   name: "Fetch web page"
@@ -66,16 +117,19 @@ std::vector<std::string> parse_inline_list(const std::string& v) {
 //   applies_to: [Webpage, Url]
 //   lang: python                     <- issue 0033: go|python|bash (default python)
 //   exec: run                        <- basename del binario/script (default "run")
-//   params:                          <- v1 ignora bloque
+//   params:
-//     - { name: timeout_s, ... }
+//     - { name: timeout_s, type: int, default: 15 }
 //     - { name: region, type: string, default: "" }
 //
-// Las claves anidadas bajo `params:` (y otros bloques con valor vacio
+// Solo el bloque `params:` se parsea con detalle. Otros bloques con valor
-// seguido de lineas indentadas) se ignoran.
+// vacio seguido de lineas indentadas (`emits:`, `relations:`,
 // `uses_functions:`) se ignoran como antes.
 bool parse_manifest(const std::string& path, EnricherSpec* out) {
    std::ifstream f(path);
    if (!f) return false;
    std::string line;
-    bool in_skip_block = false;
+    bool in_skip_block   = false;
    bool in_params_block = false;
    while (std::getline(f, line)) {
        // Strip CR de Windows.
        if (!line.empty() && line.back() == '\r') line.pop_back();
@@ -84,10 +138,27 @@ bool parse_manifest(const std::string& path, EnricherSpec* out) {
        std::string trim = strip(line);
        if (trim.empty() || trim.front() == '#') continue;
-        // Si la linea NO empieza con whitespace, salimos del bloque skip.
+        // Si la linea NO empieza con whitespace, salimos de los bloques
        // anidados — el siguiente top-level reinicia el contexto.
        bool indented = !line.empty() && std::isspace((unsigned char)line.front());
-        if (!indented) in_skip_block = false;
+        if (!indented) {
            in_skip_block   = false;
            in_params_block = false;
        }
        if (in_skip_block) continue;
        if (in_params_block) {
            // Linea esperada: `  - { name: x, type: int, default: 10 }`.
            // Tolera variaciones de indent y comilla.
            std::string body = trim;
            if (!body.empty() && body.front() == '-') {
                body = strip(body.substr(1));
            }
            EnricherParam p;
            if (parse_inline_param(body, &p)) {
                out->params.push_back(std::move(p));
            }
            continue;
        }
        size_t colon = trim.find(':');
        if (colon == std::string::npos) continue;
@@ -101,9 +172,17 @@ bool parse_manifest(const std::string& path, EnricherSpec* out) {
        else if (key == "applies_to")  out->applies_to = parse_inline_list(val);
        else if (key == "lang")        out->lang = lower(strip_quotes(val));
        else if (key == "exec")        out->exec_basename = strip_quotes(val);
-        else if (key == "params" && val.empty())   in_skip_block = true;
+        else if (key == "params") {
            // `params: []` — vacio explicito, nada que hacer.
            // `params:`    — siguiente bloque indentado son items.
            std::string vs = strip(val);
            if (vs.empty()) in_params_block = true;
            // Si fuese inline (`params: [{...}]`) — formato no usado en
            // los manifests actuales, lo ignoramos.
        }
        else if (key == "emits" && val.empty())    in_skip_block = true;
        else if (key == "relations" && val.empty()) in_skip_block = true;
        else if (key == "uses_functions" && val.empty()) in_skip_block = true;
    }
    // Defaults — preservan retrocompat con manifests existentes que no
@@ -10,11 +10,22 @@
 // `enrichers_for_type(type_ref)` para mostrar el submenu filtrado por tipo
 // del nodo right-clickado.
 //
-// Para v1 no parseamos `params` con detalle — solo lo necesario para
+// Los parametros declarados en `params:` del manifest se parsean para que
-// presentar el item de menu y submitear el job con `{}`.
+// la UI pueda renderizar un dialog de configuracion antes de lanzar el
 // job. Si la lista esta vacia, el job se submitea directamente con `{}`.
 namespace ge {
 // Parametro declarado en `manifest.yaml` -> entrada `{ name, type, default }`.
 // La UI de configuracion edita un buffer string por param y lo serializa a
 // JSON segun el `type` al pulsar Run.
 struct EnricherParam {
    std::string name;           // ej: "limit"
    std::string type;           // "int" | "float" | "string" | "bool"
    std::string default_value;  // valor por defecto en formato texto
    std::string description;    // opcional, para tooltip
 };
 struct EnricherSpec {
    std::string id;            // ej: "fetch_webpage"
    std::string name;          // ej: "Fetch web page"
@@ -34,6 +45,9 @@ struct EnricherSpec {
    // <dir>/<exec_basename>{.exe} segun la plataforma. Default "run".
    std::string exec_basename;
    // Parametros editables por el usuario antes de lanzar el job.
    std::vector<EnricherParam> params;
    // True si lang != "" y no se pudo resolver el ejecutable
    // correspondiente (ej: enricher Go sin compilar). El loader deja
    // el spec en el registro pero marcado como deshabilitado para
@@ -1,7 +1,7 @@
 id: extract_domain
 name: "Extract domain"
 description: "Saca el dominio de la url/email del nodo y crea/conecta una entidad Domain con relacion BELONGS_TO. No descarga nada."
-applies_to: [Url, Webpage, Email]
+applies_to: [Url, Email]
 emits: [Domain]
 relations: [BELONGS_TO]
 params: []
@@ -1,7 +1,7 @@
 id: extract_links
 name: "Extract links"
-description: "Lee la markdown cacheada de un Webpage (metadata.markdown_path) y crea nodos Url para cada enlace encontrado, conectados con relacion LINKS_TO. Requiere haber ejecutado fetch_webpage antes."
+description: "Lee la markdown cacheada del nodo Url (metadata.markdown_path) y crea nodos Url para cada enlace encontrado, conectados con relacion LINKS_TO. Requiere haber ejecutado fetch_webpage antes."
-applies_to: [Webpage]
+applies_to: [Url]
 emits: [Url]
 relations: [LINKS_TO]
 uses_functions:
@@ -1,7 +1,7 @@
 id: extract_text_entities
 name: "Extract entities from text"
-description: "Lee la markdown cacheada de un Webpage y extrae IoCs (IPs, emails, dominios, hashes, crypto wallets, CVEs, MAC, telefonos) creando entidades + relacion EXTRACTED_FROM. Sin coste — solo regex. Modelos ML (GLiNER/GLiREL) en futura iteracion."
+description: "Lee la markdown cacheada de un Url y extrae IoCs (IPs, emails, dominios, hashes, crypto wallets, CVEs, MAC, telefonos) creando entidades + relacion EXTRACTED_FROM. Sin coste — solo regex. Modelos ML (GLiNER/GLiREL) en futura iteracion."
-applies_to: [Webpage]
+applies_to: [Url]
 emits: [Email, IPAddress, Domain, FileHash, CryptoWallet, CVE, MACAddress, Phone]
 relations: [EXTRACTED_FROM]
 uses_functions:
@@ -1,7 +1,7 @@
 id: fetch_webpage
 name: "Fetch web page"
-description: "Descarga HTML de una URL, extrae markdown limpio (readabilipy) y guarda los blobs en cache. Crea/actualiza el nodo Webpage con title/status_code/paths y crea el Domain con relacion BELONGS_TO."
+description: "Descarga HTML de una URL, extrae markdown limpio (readabilipy) y guarda los blobs en cache. Actualiza el nodo Url con title/status_code/paths/markdown en metadata y crea el Domain con relacion BELONGS_TO."
-applies_to: [Url, Webpage]
+applies_to: [Url]
 emits: [Domain]
 relations: [BELONGS_TO]
 uses_functions:
@@ -3,7 +3,12 @@
 Lee JSON de stdin, descarga la URL del nodo, convierte HTML a markdown,
 guarda blobs en `<cache_dir>/<sha256[0:2]>/<sha256>.{html,md}`, actualiza el
-nodo a tipo Webpage con metadata enriquecida y crea/conecta el Domain.
+nodo (deja type_ref=Url) con metadata enriquecida y crea/conecta el Domain.
 Nota: historicamente fetch_webpage convertia Url -> Webpage, pero esos
 dos tipos se han unificado en Url. Los campos de cuerpo cacheado
 (html_path, markdown_path, status_code, fetched_at, text_length, ...)
 viven en metadata.
 Wire protocol (issue 0026):
  - stdin:  JSON con node_id, metadata, ops_db_path, app_dir, cache_dir,
@@ -289,7 +294,14 @@ def main() -> int:
            log(f"node {node_id} disappeared")
            return 6
        cur_type, cur_meta = row[0], row[1] or "{}"
-        new_type = "Webpage" if cur_type.lower() == "url" else cur_type or "Webpage"
+        # Webpage fue un tipo separado historicamente. Hoy se unifica en
        # Url (mismo tipo, los campos de cuerpo cacheado viven en
        # metadata): si el nodo entrante es Url o el legacy Webpage, lo
        # dejamos como Url; si el nodo no tiene tipo, default Url.
        if not cur_type or cur_type.lower() in ("url", "webpage"):
            new_type = "Url"
        else:
            new_type = cur_type
        patch = {
            "url":           url,
@@ -8,14 +8,20 @@ Wire protocol estandar (issue 0026):
  - stdout: una linea JSON al final con resumen.
  - exit code 0 = ok, !=0 = error.
-DDG endpoint usado: https://html.duckduckgo.com/html/?q=<query>
+DDG endpoints usados:
-Devuelve HTML estatico, sin JavaScript. Los enlaces vienen envueltos en
+  1. https://lite.duckduckgo.com/lite/ (POST) — endpoint primario.
-redireccion `//duckduckgo.com/l/?uddg=<encoded>` que hay que decodificar.
+     HTML minimo (ano 2009-style), tabla con `<a class='result-link'>` y
     `<td class='result-snippet'>`. Es el menos agresivo con bot
     detection; suele responder 200 cuando el endpoint `html.` ya
     devuelve un challenge "anomaly" desde IPs residenciales/Windows.
  2. https://html.duckduckgo.com/html/ (POST) — fallback. Su parser
     usa `result__a` / `result__snippet`. DDG envuelve los enlaces en
     `//duckduckgo.com/l/?uddg=<encoded>` que hay que decodificar.
-Para automatizar busquedas masivas en el futuro (sesion persistente,
+Si ambos endpoints devuelven la pagina anti-bot ("anomaly", challenge
-cookies, JS, captchas) la fase 2 introducira un enricher `web_search_cdp`
+captcha), el enricher emite un error claro indicando que se necesita
-que controle un Chromium remoto via DevTools Protocol. Este es el
+`web_search_cdp` (issue 0029) — el fallback simple zero-infra no puede
-fallback simple zero-infra.
+resolver el challenge.
 """
 from __future__ import annotations
@@ -49,13 +55,33 @@ def now_ms() -> int:
    return int(time.time() * 1000)
-def fetch_ddg(query: str, timeout: int, region: str, safe: str) -> str:
+def _ddg_post(url: str, params: dict, headers: dict, timeout: int) -> str:
-    """Descarga la pagina HTML de resultados de DuckDuckGo.
+    try:
        import requests  # type: ignore
        r = requests.post(url, data=params, headers=headers, timeout=timeout)
        return r.text
    except ImportError:
        from urllib.parse import urlencode
        from urllib.request import Request, urlopen
        body = urlencode(params).encode()
        req = Request(url, data=body, headers=headers)
        with urlopen(req, timeout=timeout) as resp:  # type: ignore
            return resp.read().decode("utf-8", errors="replace")
-    El endpoint `html.duckduckgo.com` no requiere JS y respeta los
+
-    parametros `kl` (region) y `kp` (safe search: 1 strict, -1 off,
+def is_anomaly_page(htmltxt: str) -> bool:
-    -2 moderate). Inyecta cookie para que el "moderate" se aplique sin
+    """Detecta la pagina anti-bot de DDG (challenge captcha)."""
-    pantalla intermedia.
+    s = htmltxt.lower()
    return "anomaly" in s and "challenge" in s
 def fetch_ddg(query: str, timeout: int, region: str, safe: str) -> tuple[str, str]:
    """Descarga la pagina de resultados de DuckDuckGo.
    Intenta primero `lite.duckduckgo.com/lite/` (HTML minimo, ano-2009
    style, mucho menos agresivo con bot detection que `html.`). Si
    ese endpoint devuelve la pagina anti-bot, cae al endpoint `html.`.
    Devuelve `(html, source)` donde source ∈ {"lite", "html"}.
    """
    params = {"q": query}
    if region:
@@ -66,29 +92,22 @@ def fetch_ddg(query: str, timeout: int, region: str, safe: str) -> str:
    headers = {
        "User-Agent": (
-            "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
+            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
            "(KHTML, like Gecko) Chrome/120 Safari/537.36"
        ),
        "Accept": "text/html,application/xhtml+xml;q=0.9,*/*;q=0.8",
        "Accept-Language": "en-US,en;q=0.7",
    }
-    try:
+
-        import requests  # type: ignore
+    htmltxt = _ddg_post("https://lite.duckduckgo.com/lite/", params,
-        r = requests.post(
+                         headers, timeout)
-            "https://html.duckduckgo.com/html/",
+    if not is_anomaly_page(htmltxt):
-            data=params,
+        return htmltxt, "lite"
-            headers=headers,
+
-            timeout=timeout,
+    log("lite endpoint devolvio challenge — fallback a html endpoint")
-        )
+    htmltxt = _ddg_post("https://html.duckduckgo.com/html/", params,
-        return r.text
+                         headers, timeout)
-    except ImportError:
+    return htmltxt, "html"
        from urllib.parse import urlencode
        from urllib.request import Request, urlopen
        body = urlencode(params).encode()
        req = Request("https://html.duckduckgo.com/html/", data=body,
                      headers=headers)
        with urlopen(req, timeout=timeout) as resp:  # type: ignore
            return resp.read().decode("utf-8", errors="replace")
 def decode_ddg_href(href: str) -> str:
@@ -195,7 +214,7 @@ class _DDGParser(HTMLParser):
 def parse_ddg_html(htmltxt: str) -> list[dict]:
-    """Parsea el HTML de DDG y devuelve [{url, title, snippet, rank}]."""
+    """Parsea el HTML del endpoint `html.duckduckgo.com`."""
    p = _DDGParser()
    try:
        p.feed(htmltxt)
@@ -221,6 +240,100 @@ def parse_ddg_html(htmltxt: str) -> list[dict]:
    return out
 class _DDGLiteParser(HTMLParser):
    """Parser para `lite.duckduckgo.com/lite/`.
    Estructura tipica:
      <a rel="nofollow" href="<URL>" class='result-link'>title</a>
      ...
      <td class='result-snippet'>snippet text</td>
    Los snippets vienen DESPUES del enlace (no hijo del mismo elemento),
    asi que parea por orden: cada `result-link` consume el siguiente
    `result-snippet`.
    """
    def __init__(self) -> None:
        super().__init__(convert_charrefs=True)
        self.results: list[dict] = []
        self._in_link = False
        self._in_snippet = False
        self._cur_href = ""
        self._title_buf: list[str] = []
        self._snippet_buf: list[str] = []
        self._pending_snippet_for: int | None = None
    def _attrs_dict(self, attrs):
        return {k: (v or "") for k, v in attrs}
    def handle_starttag(self, tag: str, attrs):
        a = self._attrs_dict(attrs)
        cls = a.get("class", "")
        if tag == "a" and "result-link" in cls:
            href = a.get("href", "")
            self._in_link = True
            self._cur_href = href
            self._title_buf = []
        elif tag == "td" and "result-snippet" in cls:
            self._in_snippet = True
            self._snippet_buf = []
    def handle_endtag(self, tag: str):
        if self._in_link and tag == "a":
            title = " ".join("".join(self._title_buf).split())
            self.results.append({
                "href":    self._cur_href,
                "title":   title,
                "snippet": "",
            })
            self._pending_snippet_for = len(self.results) - 1
            self._in_link = False
        elif self._in_snippet and tag == "td":
            snippet = " ".join("".join(self._snippet_buf).split())
            if self._pending_snippet_for is not None:
                self.results[self._pending_snippet_for]["snippet"] = snippet
                self._pending_snippet_for = None
            self._in_snippet = False
    def handle_data(self, data: str):
        if self._in_link:
            self._title_buf.append(data)
        elif self._in_snippet:
            self._snippet_buf.append(data)
 def parse_ddg_lite(htmltxt: str) -> list[dict]:
    """Parsea el HTML del endpoint `lite.duckduckgo.com/lite/`."""
    p = _DDGLiteParser()
    try:
        p.feed(htmltxt)
        p.close()
    except Exception as e:
        log(f"DDG lite parser failed: {e}")
    out: list[dict] = []
    seen: set[str] = set()
    for r in p.results:
        href = r.get("href") or ""
        # lite envia URLs absolutas directas; aun asi pasamos por
        # decode_ddg_href por si en algun caso DDG envuelve.
        url = decode_ddg_href(href)
        if not url or not url.startswith(("http://", "https://")):
            continue
        # Excluir auto-promociones de DDG (paginas de ayuda).
        if "duckduckgo.com/duckduckgo-help-pages/" in url:
            continue
        if url in seen:
            continue
        seen.add(url)
        out.append({
            "url":     url,
            "title":   r.get("title") or "",
            "snippet": r.get("snippet") or "",
            "rank":    len(out) + 1,
        })
    return out
 def find_url_entity(conn: sqlite3.Connection, url: str) -> str | None:
    """Busca un nodo Url existente con la misma url en metadata."""
    cur = conn.execute(
@@ -384,18 +497,40 @@ def main() -> int:
    progress(0.10, "fetching")
    try:
-        htmltxt = fetch_ddg(query, timeout=timeout_s, region=region, safe=safe)
+        htmltxt, source = fetch_ddg(query, timeout=timeout_s,
                                     region=region, safe=safe)
    except Exception as e:
        log(f"DDG fetch failed: {e}")
        print(json.dumps({"error": str(e), "query": query,
                          "entities_added": 0, "relations_added": 0}))
        return 4
    if is_anomaly_page(htmltxt):
        log("DDG devolvio challenge captcha en ambos endpoints — "
            "usar web_search_cdp (issue 0029) para resolver")
        print(json.dumps({
            "error":            "DDG bot challenge — captcha required",
            "query":            query,
            "engine":           "duckduckgo",
            "source":           source,
            "results":          0,
            "entities_added":   0,
            "relations_added":  0,
        }, ensure_ascii=False))
        return 4
    progress(0.55, "parsing")
-    results = parse_ddg_html(htmltxt)
+    # El parser se elige por contenido — si el endpoint y el markup no
    # coinciden (tests con stub que sirve cualquier URL, o un cambio
    # futuro de DDG), aun extraemos resultados. Probamos ambos y nos
    # quedamos con el que devuelva mas.
    results_lite = parse_ddg_lite(htmltxt) if "result-link" in htmltxt else []
    results_html = parse_ddg_html(htmltxt) if "result__a"   in htmltxt else []
    results = results_lite if len(results_lite) >= len(results_html) else results_html
    if limit > 0:
        results = results[:limit]
-    log(f"DDG returned {len(results)} results")
+    log(f"DDG ({source}) returned {len(results)} results "
        f"(lite_parsed={len(results_lite)} html_parsed={len(results_html)})")
    progress(0.80, "applying")
    conn = sqlite3.connect(ops_db_path)
@@ -101,25 +101,18 @@ entities:
      - { name: country,  type: string }
      - { name: postcode, type: string }
  # Url — unifica el viejo Url (solo metadata) y Webpage (cuerpo
  # cacheado). Tras fetch_webpage, los campos `*_path`, `status_code`,
  # `fetched_at`, `text_length`, etc. tienen valor; sin haber corrido
  # fetch siguen vacios pero el nodo sigue siendo un Url valido.
  - name: Url
    color: "#89E0FC"
    icon: ti-link
    principal_field: url
    fields:
      - { name: url,    type: url, required: true }
      - { name: title,  type: string }
      - { name: domain, type: string }
  # Documento web descargado. Issue 0027: tipo separado de Url para nodos
  # con cuerpo cacheado (HTML+markdown+screenshot). Los enrichers
  # fetch_webpage / extract_links / extract_text_entities lo pueblan.
  - name: Webpage
    color: "#89E0FC"
    icon: ti-file-text
    principal_field: url
    fields:
      - { name: url,             type: url, required: true }
      - { name: title,           type: string }
      - { name: domain,          type: string }
      - { name: status_code,     type: int }
      - { name: content_type,    type: string }
      - { name: fetched_at,      type: date }
@@ -378,15 +378,20 @@ std::string read_entity_field(const char* db_path, const char* id,
    return out;
 }
-// JSON entregado al subprocess. Todos los paths se normalizan a WSL en
+// JSON entregado al subprocess. En Windows, los paths se normalizan a
-// Windows; en POSIX los respeta tal cual.
+// forma WSL solo cuando el subprocess corre dentro de WSL (lang=bash, o
 // python con runtime registry_venv). Para subprocesses nativos Windows
 // (lang=go, o python embedded/FN_PYTHON/system) se mantienen los paths
 // Windows-nativos — pasarlos como /mnt/c/... haria que fallen al abrir.
 // En POSIX la conversion es no-op y siempre se respetan los paths.
 std::string build_stdin_json(const std::string& job_id,
                             const std::string& enricher_id,
                             const std::string& node_id,
                             const std::string& params_json,
                             const std::string& ops_db,
                             const std::string& app_dir,
-                             const std::string& registry_root)
+                             const std::string& registry_root,
                             const std::string& lang)
 {
    std::string node_type, node_name, node_metadata = "{}";
    if (!node_id.empty()) {
@@ -420,10 +425,25 @@ std::string build_stdin_json(const std::string& job_id,
    std::string app_dir_abs  = absify(app_dir);
    std::string root_abs     = absify(registry_root);
-    std::string ops_db_wsl   = to_wsl_path(ops_db_abs);
+    // Decidir si convertir paths a forma WSL. Solo se hace cuando el
-    std::string app_dir_wsl  = to_wsl_path(app_dir_abs);
+    // subprocess vive dentro de WSL — si no, los paths /mnt/c/... no
-    std::string root_wsl     = to_wsl_path(root_abs);
+    // existen para el proceso Windows-nativo.
-    std::string cache_dir    = app_dir_wsl + "/cache";
+    bool use_wsl_paths = false;
 #ifdef _WIN32
    if (lang == "bash") {
        use_wsl_paths = true;
    } else if (lang == "python") {
        use_wsl_paths = cached_python_runtime().needs_wsl;
    }
    // lang == "go": siempre nativo Windows.
 #else
    (void)lang;
 #endif
    std::string ops_db_out  = use_wsl_paths ? to_wsl_path(ops_db_abs)  : ops_db_abs;
    std::string app_dir_out = use_wsl_paths ? to_wsl_path(app_dir_abs) : app_dir_abs;
    std::string root_out    = use_wsl_paths ? to_wsl_path(root_abs)    : root_abs;
    std::string cache_dir   = app_dir_out + "/cache";
    std::ostringstream o;
    o << '{'
@@ -434,10 +454,10 @@ std::string build_stdin_json(const std::string& job_id,
      << "\"node_name\":\""   << json_escape(node_name)     << "\","
      << "\"metadata\":"      << (node_metadata.empty() ? "{}" : node_metadata) << ","
      << "\"params\":"        << (params_json.empty() ? "{}" : params_json) << ","
-      << "\"ops_db_path\":\"" << json_escape(ops_db_wsl)    << "\","
+      << "\"ops_db_path\":\"" << json_escape(ops_db_out)    << "\","
-      << "\"app_dir\":\""     << json_escape(app_dir_wsl)   << "\","
+      << "\"app_dir\":\""     << json_escape(app_dir_out)   << "\","
      << "\"cache_dir\":\""   << json_escape(cache_dir)     << "\","
-      << "\"registry_root\":\"" << json_escape(root_wsl)    << "\""
+      << "\"registry_root\":\"" << json_escape(root_out)    << "\""
      << '}';
    return o.str();
 }
@@ -1030,7 +1050,7 @@ void worker_loop() {
        }
        std::string stdin_payload = build_stdin_json(
            ctx.id, ctx.enricher_id, ctx.node_id, ctx.params_json,
-            ops_db, g_state->app_dir, g_state->registry_root);
+            ops_db, g_state->app_dir, g_state->registry_root, lang);
        ProcResult res = run_subprocess(job_id, run_path, lang,
                                         stdin_payload, ctrl);
@@ -43,6 +43,8 @@
 #include <cmath>
 #include <string>
 #include <sys/stat.h>
 #include <algorithm>
 #include <unordered_map>
 #include <vector>
 #ifndef _WIN32
@@ -318,27 +320,91 @@ static void place_orphans_near_neighbors(GraphData& g, float min_dist,
    int   park_n = 0;
    int placed_neighbor = 0, placed_camera = 0, parked = 0;
    for (int i = 0; i < g.node_count; ++i) {
        GraphNode& n = g.nodes[i];
        if (n.x != 0.0f || n.y != 0.0f) continue;
    // ----- Pase 1: agrupar orphans por su anchor (vecino con posicion) -----
    // Cuando un enricher crea N nodos todos conectados al mismo source
    // (caso tipico: web_search → N Urls SEARCH_RESULT_OF source), queremos
    // que los N nodos clustereen MUY apretados alrededor del source en
    // un solo anillo, no que se desperdiguen por anillos concentricos
    // hasta encontrar slot libre. La busqueda anti-colision individual
    // los empuja hacia fuera cuando ya hay vecinos preexistentes; aqui
    // les damos a los hermanos del mismo anchor angulos repartidos en
    // un anillo unico cerca del padre.
    std::unordered_map<int, std::vector<int>> orphans_by_anchor;
    std::vector<int>                          orphans_no_anchor;
    for (int i = 0; i < g.node_count; ++i) {
        const GraphNode& n = g.nodes[i];
        if (n.x != 0.0f || n.y != 0.0f) continue;
        int parent = layout_first_placed_neighbor(g, i);
-        if (parent >= 0) {
+        if (parent >= 0) orphans_by_anchor[parent].push_back(i);
-            float ox, oy;
+        else             orphans_no_anchor.push_back(i);
-            if (find_collision_free_slot(
+    }
-                    g, i, g.nodes[parent].x, g.nodes[parent].y,
+
-                    min_dist, n.user_data,
+    // ----- Pase 2: place clusters (orphans con anchor) -----
-                    neighbor_radii, n_neighbor_radii, &ox, &oy)) {
+    // Para cada anchor con sus hijos, los repartimos en un anillo
-                n.x = ox; n.y = oy;
+    // alrededor del padre. Si hay mas hijos de los que caben en el
-            } else {
+    // anillo base, abrimos anillos adicionales. Cada hijo sigue
-                // Acepta solape como ultimo recurso.
+    // pasando find_collision_free_slot como fallback si el slot ideal
-                n.x = g.nodes[parent].x + neighbor_radii[n_neighbor_radii - 1];
+    // estaba ocupado por otro nodo del grafo.
-                n.y = g.nodes[parent].y;
+    const float two_pi = 6.28318530718f;
    for (auto& kv : orphans_by_anchor) {
        int parent = kv.first;
        std::vector<int>& kids = kv.second;
        if (kids.empty()) continue;
        // Orden estable por user_data para que rondas sucesivas del
        // mismo enricher (mismo set de hijos) coloquen igual.
        std::sort(kids.begin(), kids.end(),
                  [&](int a, int b) {
                      return g.nodes[a].user_data < g.nodes[b].user_data;
                  });
        float cx = g.nodes[parent].x;
        float cy = g.nodes[parent].y;
        // Capacidad por anillo: circunferencia / min_dist.
        // Para min_dist=60, ring r=80 -> ~8 slots; r=140 -> ~14.
        for (size_t k = 0; k < kids.size(); ++k) {
            // Anillo y slot dentro del anillo en funcion del indice.
            int ri = 0; size_t accum = 0; size_t cap = 0;
            for (; ri < n_neighbor_radii; ++ri) {
                float r_here = neighbor_radii[ri];
                cap = (size_t)std::max(6.0f, two_pi * r_here / min_dist);
                if (k < accum + cap) break;
                accum += cap;
            }
-            n.vx = n.vy = 0.0f;
+            if (ri >= n_neighbor_radii) ri = n_neighbor_radii - 1;
            float r_use = neighbor_radii[ri];
            cap = (size_t)std::max(6.0f, two_pi * r_use / min_dist);
            size_t slot = k - accum;
            // Jitter pequeno por user_data para que rondas distintas no
            // queden alineadas si comparten anchor.
            uint64_t seed = g.nodes[kids[k]].user_data;
            float jitter = ((float)((seed >> 16) & 0xFF) / 255.0f) * (two_pi / cap);
            float angle  = jitter + (float)slot * (two_pi / cap);
            float px = cx + r_use * std::cos(angle);
            float py = cy + r_use * std::sin(angle);
            // Si el slot ideal colisiona con un nodo ajeno al cluster,
            // delegamos en find_collision_free_slot que probara mas
            // angulos en radios crecientes.
            GraphNode& kid = g.nodes[kids[k]];
            if (layout_no_collision(g, kids[k], px, py, min_dist)) {
                kid.x = px; kid.y = py;
            } else {
                float ox, oy;
                if (find_collision_free_slot(
                        g, kids[k], cx, cy, min_dist, seed,
                        neighbor_radii, n_neighbor_radii, &ox, &oy)) {
                    kid.x = ox; kid.y = oy;
                } else {
                    kid.x = px; kid.y = py;  // ultimo recurso: solape
                }
            }
            kid.vx = kid.vy = 0.0f;
            ++placed_neighbor;
            continue;
        }
    }
    // ----- Pase 3: place orphans sin anchor (camera o parking lot) -----
    for (int i : orphans_no_anchor) {
        GraphNode& n = g.nodes[i];
        if (use_camera) {
            // Sin vecino → colocar dentro de la camara con ring placement.
@@ -875,10 +941,29 @@ static void render_context_menu() {
        } else {
            for (const auto& s : specs) {
                if (ImGui::MenuItem(s.name.c_str())) {
-                    char job_id[64];
+                    if (s.params.empty()) {
-                    bool ok = ge::jobs_submit(s.id.c_str(), sql_id, lbl,
+                        // Sin params editables: submit directo, comportamiento
-                                              "{}", job_id, sizeof(job_id));
+                        // historico — un click y a correr.
-                    if (ok) g_app.panel_jobs = true;
+                        char job_id[64];
                        bool ok = ge::jobs_submit(s.id.c_str(), sql_id, lbl,
                                                  "{}", job_id, sizeof(job_id));
                        if (ok) g_app.panel_jobs = true;
                    } else {
                        // Abrir ventana de configuracion. Inicializar
                        // buffers con los defaults del manifest.
                        g_app.enr_modal_id          = s.id;
                        g_app.enr_modal_node_id     = sql_id;
                        g_app.enr_modal_node_label  = lbl ? lbl : "";
                        g_app.enr_modal_param_bufs.clear();
                        g_app.enr_modal_param_bufs.resize(s.params.size());
                        for (size_t i = 0; i < s.params.size(); ++i) {
                            const std::string& dv = s.params[i].default_value;
                            auto& buf = g_app.enr_modal_param_bufs[i];
                            buf.assign(256, '\0');
                            std::snprintf(buf.data(), buf.size(), "%s", dv.c_str());
                        }
                        g_app.enr_window_open = true;
                    }
                }
                if (!s.description.empty() && ImGui::IsItemHovered()) {
                    ImGui::SetTooltip("%s", s.description.c_str());
@@ -891,6 +976,171 @@ static void render_context_menu() {
    ImGui::EndPopup();
 }
 // ----------------------------------------------------------------------------
 // Modal: configurar parametros de enricher antes de lanzar el job
 // ----------------------------------------------------------------------------
 // Se invoca desde el context menu (Run enricher → click). Si el enricher
 // declara `params` en su manifest, en lugar de submitear directamente,
 // llenamos el AppState (ver bloque `enr_modal_*`) y aqui renderizamos el
 // dialogo. El usuario ajusta valores y al pulsar Run construimos el
 // JSON `{ "param": value, ... }` y lo pasamos a `jobs_submit`.
 static std::string json_escape_str(const std::string& s) {
    std::string out;
    out.reserve(s.size() + 8);
    for (char c : s) {
        switch (c) {
            case '"':  out += "\\\""; break;
            case '\\': out += "\\\\"; break;
            case '\n': out += "\\n";  break;
            case '\r': out += "\\r";  break;
            case '\t': out += "\\t";  break;
            default:
                if ((unsigned char)c < 0x20) {
                    char b[8];
                    std::snprintf(b, sizeof(b), "\\u%04x", (unsigned char)c);
                    out += b;
                } else {
                    out.push_back(c);
                }
        }
    }
    return out;
 }
 // Renderiza una fila label/input dentro de una BeginTable de 2 columnas.
 // El label va a la izquierda alineado al frame del input; el input usa
 // todo el ancho disponible de la columna derecha.
 static void labeled_row_begin(const char* label) {
    ImGui::TableNextRow();
    ImGui::TableNextColumn();
    ImGui::AlignTextToFramePadding();
    ImGui::TextUnformatted(label);
    ImGui::TableNextColumn();
    ImGui::SetNextItemWidth(-FLT_MIN);
 }
 static void render_enricher_config_window() {
    if (!g_app.enr_window_open) return;
    ImGui::SetNextWindowSize(ImVec2(420, 0), ImGuiCond_FirstUseEver);
    if (!ImGui::Begin("Run enricher", &g_app.enr_window_open,
                       ImGuiWindowFlags_NoCollapse)) {
        ImGui::End();
        return;
    }
    const ge::EnricherSpec* spec = ge::enricher_by_id(g_app.enr_modal_id.c_str());
    if (!spec) {
        ImGui::TextDisabled("(enricher no encontrado)");
        ImGui::End();
        return;
    }
    ImGui::Text("%s", spec->name.c_str());
    if (!spec->description.empty()) {
        ImGui::PushStyleColor(ImGuiCol_Text, ImVec4(0.7f, 0.7f, 0.7f, 1.0f));
        ImGui::TextWrapped("%s", spec->description.c_str());
        ImGui::PopStyleColor();
    }
    ImGui::Separator();
    ImGui::TextDisabled("Node: %s", g_app.enr_modal_node_label.c_str());
    ImGui::Spacing();
    // Asegurar tamaño de buffers — un manifest puede haberse recargado
    // con mas params de los que llenamos al abrir la ventana.
    if (g_app.enr_modal_param_bufs.size() < spec->params.size()) {
        g_app.enr_modal_param_bufs.resize(spec->params.size());
    }
    if (ImGui::BeginTable("##enr_params", 2,
                           ImGuiTableFlags_SizingStretchProp |
                           ImGuiTableFlags_NoBordersInBody)) {
        ImGui::TableSetupColumn("name",  ImGuiTableColumnFlags_WidthFixed, 110.0f);
        ImGui::TableSetupColumn("value", ImGuiTableColumnFlags_WidthStretch);
        for (size_t i = 0; i < spec->params.size(); ++i) {
            const auto& p = spec->params[i];
            auto& buf = g_app.enr_modal_param_bufs[i];
            if (buf.size() < 256) buf.resize(256, '\0');
            ImGui::PushID((int)i);
            labeled_row_begin(p.name.c_str());
            const std::string& t = p.type;
            if (t == "int") {
                int v = std::atoi(buf.data());
                if (ImGui::InputInt("##v", &v, 1, 10)) {
                    std::snprintf(buf.data(), buf.size(), "%d", v);
                }
            } else if (t == "float" || t == "double" || t == "number") {
                float v = (float)std::atof(buf.data());
                if (ImGui::InputFloat("##v", &v)) {
                    std::snprintf(buf.data(), buf.size(), "%g", v);
                }
            } else if (t == "bool") {
                bool v = (std::strcmp(buf.data(), "true") == 0 ||
                           std::strcmp(buf.data(), "1") == 0);
                if (ImGui::Checkbox("##v", &v)) {
                    std::snprintf(buf.data(), buf.size(), "%s", v ? "true" : "false");
                }
            } else {
                ImGui::InputText("##v", buf.data(), buf.size());
            }
            if (!p.description.empty() && ImGui::IsItemHovered()) {
                ImGui::SetTooltip("%s", p.description.c_str());
            }
            ImGui::PopID();
        }
        ImGui::EndTable();
    }
    ImGui::Separator();
    if (ImGui::Button("Run", ImVec2(100, 0))) {
        // Construir JSON `{ "name": value, ... }` segun los tipos.
        std::string j = "{";
        for (size_t i = 0; i < spec->params.size(); ++i) {
            const auto& p = spec->params[i];
            const auto& buf = g_app.enr_modal_param_bufs[i];
            if (i) j += ",";
            j += "\"";
            j += json_escape_str(p.name);
            j += "\":";
            if (p.type == "int") {
                int v = std::atoi(buf.data());
                char b[32]; std::snprintf(b, sizeof(b), "%d", v);
                j += b;
            } else if (p.type == "float" || p.type == "double" || p.type == "number") {
                double v = std::atof(buf.data());
                char b[64]; std::snprintf(b, sizeof(b), "%g", v);
                j += b;
            } else if (p.type == "bool") {
                bool v = (std::strcmp(buf.data(), "true") == 0 ||
                           std::strcmp(buf.data(), "1") == 0);
                j += v ? "true" : "false";
            } else {
                j += "\"";
                j += json_escape_str(buf.data());
                j += "\"";
            }
        }
        j += "}";
        char job_id[64];
        bool ok = ge::jobs_submit(spec->id.c_str(),
                                   g_app.enr_modal_node_id.c_str(),
                                   g_app.enr_modal_node_label.c_str(),
                                   j.c_str(), job_id, sizeof(job_id));
        if (ok) g_app.panel_jobs = true;
        g_app.enr_window_open = false;
    }
    ImGui::SameLine();
    if (ImGui::Button("Cancel", ImVec2(100, 0))) {
        g_app.enr_window_open = false;
    }
    ImGui::End();
 }
 // ----------------------------------------------------------------------------
 // Label callback
 // ----------------------------------------------------------------------------
@@ -1742,6 +1992,9 @@ static void render() {
    ImGui::SetNextWindowSize(ImVec2(520.0f, 720.0f), ImGuiCond_FirstUseEver);
    ge::chat_render(&g_app.panel_chat);
    // Enricher config window (abierto desde context menu Run enricher).
    render_enricher_config_window();
    g_first_render = false;
 }
@@ -0,0 +1,37 @@
 """Trampoline para invocar enrichers desde tests.
 El Python embebido de Windows (`python-embed`) ignora `PYTHONPATH` por
 diseno — el control de sys.path lo lleva el fichero `python312._pth`.
 Para inyectar el stub `requests` de tests sin tocar ese fichero, los
 tests llaman a este runner en vez de a `run.py` directamente:
    python _runner.py <run.py>
 El runner anade `$_STUB_PATHS` al frente de `sys.path` y ejecuta el
 script objetivo como si hubiese sido invocado directamente.
 """
 from __future__ import annotations
 import os
 import runpy
 import sys
 def main() -> int:
    stub_paths = os.environ.get("_STUB_PATHS", "")
    if stub_paths:
        for p in stub_paths.split(os.pathsep):
            if p and p not in sys.path:
                sys.path.insert(0, p)
    if len(sys.argv) < 2:
        sys.stderr.write("usage: _runner.py <script>\n")
        return 2
    target = sys.argv[1]
    sys.argv = [target] + sys.argv[2:]
    runpy.run_path(target, run_name="__main__")
    return 0
 if __name__ == "__main__":
    raise SystemExit(main())
@@ -23,24 +23,106 @@ from pathlib import Path
 import pytest
 REGISTRY_ROOT = Path(__file__).resolve().parents[5]
 APP_DIR_SRC   = Path(__file__).resolve().parents[1]   # graph_explorer/
 ENRICHERS_DIR = APP_DIR_SRC / "enrichers"
 TESTS_DIR     = Path(__file__).resolve().parent
 STUBS_DIR     = TESTS_DIR / "_stubs"
-PYTHON_BIN    = REGISTRY_ROOT / "python" / ".venv" / "bin" / "python3"
+
 # Los enrichers viven en `<app>/enrichers/` en el repo dev y en
 # `<app>/assets/enrichers/` en la carpeta portable de Windows
 # (convencion `assets/` desde el ADR de feb-2026). Detectar cual
 # existe y usar ese.
 def _resolve_enrichers_dir() -> Path:
    cands = [
        APP_DIR_SRC / "enrichers",
        APP_DIR_SRC / "assets" / "enrichers",
    ]
    for c in cands:
        if c.is_dir():
            return c
    # Default a la primera para mensajes de error consistentes con el dev layout.
    return cands[0]
 ENRICHERS_DIR = _resolve_enrichers_dir()
 def _resolve_registry_root() -> Path:
    """Sube desde el directorio de tests buscando un marker del registry.
    En el repo: APP_DIR/projects/osint_graph/apps/graph_explorer/tests
    -> 5 niveles arriba esta fn_registry/. En la carpeta de Windows
    (Desktop/apps/graph_explorer/tests) NO hay registry — usamos el
    propio app dir como fallback. Los tests no leen registry.db; solo
    se pasa registry_root via ctx por compatibilidad con run.py.
    """
    # Marker fiable: fichero `cmd/fn/main.go` o `registry.db`.
    p = APP_DIR_SRC
    for _ in range(8):
        if (p / "cmd" / "fn" / "main.go").exists() or \
           (p / "registry.db").exists():
            return p
        if p.parent == p:
            break
        p = p.parent
    # Sin registry: usa el app dir como pseudo-root. Los tests funcionan
    # igual mientras no haya un test que importe paquetes del registry.
    return APP_DIR_SRC
 REGISTRY_ROOT = _resolve_registry_root()
 def _resolve_python_bin() -> Path:
    """Elige el Python con el que ejecutar los enrichers.
    Prioridad (cubre Linux/WSL dev y Windows portable instalado):
      1. $FN_TEST_PYTHON                                      env override
      2. <app>/assets/runtime/python/python.exe               (Windows portable, solo Windows)
      3. <app>/runtime/python/python.exe                      (legacy, solo Windows)
      4. <registry>/python/.venv/bin/python3                  (WSL dev venv)
      5. sys.executable                                       (whatever runs pytest)
    Los candidatos `python.exe` solo se aceptan si corremos en Windows
    nativo. En WSL/Linux pueden existir vendored en el repo (los
    distribuibles), pero no son ejecutables en este OS.
    """
    env = os.environ.get("FN_TEST_PYTHON")
    if env and Path(env).exists():
        return Path(env)
    is_windows = sys.platform.startswith("win")
    cands: list[Path] = []
    if is_windows:
        cands += [
            APP_DIR_SRC / "assets" / "runtime" / "python" / "python.exe",
            APP_DIR_SRC / "runtime" / "python" / "python.exe",
        ]
    cands += [REGISTRY_ROOT / "python" / ".venv" / "bin" / "python3"]
    for c in cands:
        if c.exists():
            return c
    return Path(sys.executable)
 PYTHON_BIN = _resolve_python_bin()
 def stub_requests(tmp_path: Path, plan: dict) -> dict:
    """Escribe el plan de respuestas y devuelve el env que activa el stub.
-    El stub vive en tests/_stubs/requests.py y se activa via PYTHONPATH.
+    Devuelve dos vias por las que `_runner.py` y un Python no-embedded
    pueden inyectar el stub:
      - `PYTHONPATH`: la ruta estandar; respeta el orden y el resto del
        entorno. Funciona en Linux y en Python full instalado (no-embed).
      - `_STUB_PATHS`: lo lee `_runner.py` y hace `sys.path.insert(0, ...)`.
        Necesario en el Python embebido de Windows, que ignora
        PYTHONPATH (lo controla `python312._pth`).
    Plan acepta `default` y/o `match` (lista de {contains, status, text}).
    """
    plan_file = tmp_path / "_stub_plan.json"
    plan_file.write_text(json.dumps(plan), encoding="utf-8")
    return {
        "PYTHONPATH": str(STUBS_DIR) + os.pathsep + os.environ.get("PYTHONPATH", ""),
        "_STUB_PATHS": str(STUBS_DIR),
        "_STUB_REQUESTS_PLAN": str(plan_file),
    }
@@ -189,17 +271,23 @@ def run_enricher(enricher_id: str, ctx: dict, *, env: dict | None = None,
                 timeout: int = 30) -> tuple[int, dict | None, str]:
    """Lanza enrichers/<id>/run.py con el wire protocol estandar.
    Usa siempre el trampoline `_runner.py` para que el stub de
    requests se inyecte tanto con PYTHONPATH (Python normal) como con
    `_STUB_PATHS` (Python embebido de Windows que ignora PYTHONPATH).
    Returns: (exit_code, stdout_json_or_None, stderr_text)
    """
    run_py = ENRICHERS_DIR / enricher_id / "run.py"
    assert run_py.exists(), f"no existe {run_py}"
    runner = TESTS_DIR / "_runner.py"
    assert runner.exists(), f"no existe {runner}"
    full_env = os.environ.copy()
    if env:
        full_env.update(env)
    proc = subprocess.run(
-        [str(PYTHON_BIN), str(run_py)],
+        [str(PYTHON_BIN), str(runner), str(run_py)],
        input=json.dumps(ctx),
        capture_output=True,
        text=True,
@@ -12,7 +12,9 @@ from __future__ import annotations
 import json
 import os
 import shutil
 import subprocess
 import sys
 from pathlib import Path
 import pytest
@@ -130,12 +132,17 @@ def test_python_dummy_enricher_obeys_wire_protocol(tmp_path):
 # Wire protocol — Bash (la ruta nueva)
 # ---------------------------------------------------------------------------
@pytest.mark.skipif(
    sys.platform.startswith("win") or not shutil.which("bash"),
    reason="test bash-only — saltado en Windows (el bash de WSL no acepta "
           "rutas Windows nativas) y en sistemas sin bash",
 )
 def test_bash_dummy_enricher_obeys_wire_protocol(tmp_path):
    enr = _write_dummy_enricher(tmp_path, eid="dummy_sh", lang="bash")
    ctx = json.dumps({"node_id": "n1", "ops_db_path": "", "params": {}})
    proc = subprocess.run(
-        ["/bin/bash", str(enr / "run.sh")],
+        [shutil.which("bash"), str(enr / "run.sh")],
        input=ctx, capture_output=True, text=True, timeout=10,
    )
    assert proc.returncode == 0, proc.stderr
@@ -31,10 +31,10 @@ def test_extract_links_creates_url_nodes(ops_db, app_dir, registry_root):
    # 2) Crear Webpage con metadata.markdown_path apuntando al cache.
    make_node(ops_db, node_id="w1", name="demo",
-              type_ref="Webpage", metadata={"markdown_path": str(rel)})
+              type_ref="Url", metadata={"markdown_path": str(rel)})
    ctx = base_ctx(ops_db=ops_db, app_dir=app_dir, registry_root=registry_root,
-                   node_id="w1", node_name="demo", node_type="Webpage",
+                   node_id="w1", node_name="demo", node_type="Url",
                   metadata={"markdown_path": str(rel)})
    rc, out, err = run_enricher("extract_links", ctx)
@@ -54,9 +54,9 @@ def test_extract_links_creates_url_nodes(ops_db, app_dir, registry_root):
 def test_extract_links_without_markdown_path_errors(ops_db, app_dir,
                                                     registry_root):
    make_node(ops_db, node_id="w1", name="demo",
-              type_ref="Webpage", metadata={})
+              type_ref="Url", metadata={})
    ctx = base_ctx(ops_db=ops_db, app_dir=app_dir, registry_root=registry_root,
-                   node_id="w1", node_name="demo", node_type="Webpage")
+                   node_id="w1", node_name="demo", node_type="Url")
    rc, out, err = run_enricher("extract_links", ctx)
    assert rc != 0, "deberia fallar sin markdown_path"
    assert out is not None
@@ -27,9 +27,9 @@ def test_extract_iocs_creates_typed_entities(ops_db, app_dir, registry_root):
    rel = md_path.relative_to(app_dir)
    make_node(ops_db, node_id="w1", name="report",
-              type_ref="Webpage", metadata={"markdown_path": str(rel)})
+              type_ref="Url", metadata={"markdown_path": str(rel)})
    ctx = base_ctx(ops_db=ops_db, app_dir=app_dir, registry_root=registry_root,
-                   node_id="w1", node_name="report", node_type="Webpage",
+                   node_id="w1", node_name="report", node_type="Url",
                   metadata={"markdown_path": str(rel)})
    rc, out, err = run_enricher("extract_text_entities", ctx)
@@ -38,7 +38,7 @@ def test_extract_iocs_creates_typed_entities(ops_db, app_dir, registry_root):
    assert out["entities_added"] >= 3, out
    types = {e["type_ref"] for e in list_entities(ops_db)
-             if e["type_ref"] != "Webpage"}
+             if e["type_ref"] != "Url"}
    # No exigimos todos los tipos — depende de que extract_iocs cubra cada
    # patron — pero al menos Email y CVE deberian estar.
    assert "Email" in types, types
@@ -51,9 +51,9 @@ def test_extract_iocs_creates_typed_entities(ops_db, app_dir, registry_root):
 def test_extract_iocs_without_markdown_errors(ops_db, app_dir, registry_root):
    make_node(ops_db, node_id="w1", name="empty",
-              type_ref="Webpage", metadata={})
+              type_ref="Url", metadata={})
    ctx = base_ctx(ops_db=ops_db, app_dir=app_dir, registry_root=registry_root,
-                   node_id="w1", node_name="empty", node_type="Webpage")
+                   node_id="w1", node_name="empty", node_type="Url")
    rc, out, err = run_enricher("extract_text_entities", ctx)
    assert rc != 0
    assert out and "missing markdown_path" in (out.get("error") or "")
@@ -42,9 +42,9 @@ def test_fetch_webpage_creates_domain_and_caches(ops_db, app_dir, registry_root,
    assert out["entities_added"] == 1   # Domain
    assert out["relations_added"] == 1  # BELONGS_TO
-    # El nodo Url se promueve a Webpage.
+    # El nodo Url permanece como Url (Webpage se unifico en Url).
    e = get_entity(ops_db, "u1")
-    assert e["type_ref"] == "Webpage", e
+    assert e["type_ref"] == "Url", e
    assert e["metadata"]["title"] == "Acme Demo"
    assert e["metadata"]["status_code"] == 200
@@ -109,6 +109,12 @@ def test_resolver_uses_fn_python_env_var(tmp_path):
    assert str(fake) in line, line
@pytest.mark.skipif(
    shutil.which("bash") is None or
    not (APP_DIR_SRC / "tools" / "freeze_python_runtime.sh").exists(),
    reason="bash o tools/freeze_python_runtime.sh no disponible "
           "(esperado en deploy portable)",
 )
 def test_freeze_script_is_idempotent(tmp_path):
    """Llamadas consecutivas con mismas deps no rehacen el runtime."""
    fake_app = tmp_path / "app"
@@ -23,6 +23,17 @@ from conftest import APP_DIR_SRC, REGISTRY_ROOT
 SCRIPT = APP_DIR_SRC / "tools" / "vendor_enricher_python.sh"
 # El script vendor es bash-only y vive en el repo dev. En la carpeta
 # portable de Windows no esta presente; ademas necesitaria un bash
 # real para ejecutarse. Saltamos toda la suite si:
 #   - no encontramos `bash` en PATH (Windows), o
 #   - el script no existe (deploy portable sin tools/).
 pytestmark = pytest.mark.skipif(
    shutil.which("bash") is None or not SCRIPT.exists(),
    reason="bash o tools/vendor_enricher_python.sh no disponible "
           "(esperado en deploy portable)",
 )
 def _make_enricher_dir(tmp_path: Path, manifest: str) -> Path:
    enr = tmp_path / "test_enricher"
@@ -874,173 +874,234 @@ void views_inspector(AppState& app) {
    bool any_change = false;
    // ---- Identidad ----
    // Layout label-izquierda / input-derecha via 2-col table. El label
    // alineado al frame del input y el input estirado al ancho restante.
    ImGui::TextUnformatted("Identity");
    ImGui::Separator();
    if (ImGui::InputText("name", app.insp_name_buf, sizeof(app.insp_name_buf)))
        any_change = true;
-    // type combo
+    if (ImGui::BeginTable("##insp_id", 2,
-    {
+                           ImGuiTableFlags_SizingStretchProp |
-        int cur = -1;
+                           ImGuiTableFlags_NoBordersInBody)) {
-        for (size_t i = 0; i < app.insp_type_options.size(); ++i) {
+        ImGui::TableSetupColumn("k", ImGuiTableColumnFlags_WidthFixed, 90.0f);
-            if (app.insp_type_options[i] == app.insp_type_buf) { cur = (int)i; break; }
+        ImGui::TableSetupColumn("v", ImGuiTableColumnFlags_WidthStretch);
-        }
+
-        // Si el tipo no esta en el cache (raro), mostrar como tal y permitir
+        // name
-        // introducirlo via input. Combo simple aqui.
+        ImGui::TableNextRow(); ImGui::TableNextColumn();
-        if (ImGui::BeginCombo("type", app.insp_type_buf)) {
+        ImGui::AlignTextToFramePadding(); ImGui::TextUnformatted("name");
        ImGui::TableNextColumn(); ImGui::SetNextItemWidth(-FLT_MIN);
        if (ImGui::InputText("##name", app.insp_name_buf,
                              sizeof(app.insp_name_buf)))
            any_change = true;
        // type combo
        ImGui::TableNextRow(); ImGui::TableNextColumn();
        ImGui::AlignTextToFramePadding(); ImGui::TextUnformatted("type");
        ImGui::TableNextColumn(); ImGui::SetNextItemWidth(-FLT_MIN);
        {
            int cur = -1;
            for (size_t i = 0; i < app.insp_type_options.size(); ++i) {
-                bool is_sel = (int)i == cur;
+                if (app.insp_type_options[i] == app.insp_type_buf) {
-                if (ImGui::Selectable(app.insp_type_options[i].c_str(), is_sel)) {
+                    cur = (int)i; break;
                    copy_to_buf(app.insp_type_buf, sizeof(app.insp_type_buf),
                                  app.insp_type_options[i]);
                    any_change = true;
                }
                if (is_sel) ImGui::SetItemDefaultFocus();
            }
-            ImGui::EndCombo();
+            if (ImGui::BeginCombo("##type", app.insp_type_buf)) {
                for (size_t i = 0; i < app.insp_type_options.size(); ++i) {
                    bool is_sel = (int)i == cur;
                    if (ImGui::Selectable(app.insp_type_options[i].c_str(), is_sel)) {
                        copy_to_buf(app.insp_type_buf, sizeof(app.insp_type_buf),
                                      app.insp_type_options[i]);
                        any_change = true;
                    }
                    if (is_sel) ImGui::SetItemDefaultFocus();
                }
                ImGui::EndCombo();
            }
        }
        // status combo
        ImGui::TableNextRow(); ImGui::TableNextColumn();
        ImGui::AlignTextToFramePadding(); ImGui::TextUnformatted("status");
        ImGui::TableNextColumn(); ImGui::SetNextItemWidth(-FLT_MIN);
        if (ImGui::Combo("##status", &app.insp_status_idx,
                           k_status_options, k_status_count))
            any_change = true;
        ImGui::EndTable();
    }
-    // status combo
+
-    if (ImGui::Combo("status", &app.insp_status_idx,
+    // description — multiline va debajo de su label, ocupando todo el
-                       k_status_options, k_status_count))
+    // ancho. Con 60 px de alto entra ~3 lineas; el usuario hace scroll
-        any_change = true;
+    // dentro del input para textos mas largos.
-    // description multiline
+    ImGui::Spacing();
    ImGui::TextUnformatted("description");
    if (app.insp_desc_buf.empty()) ensure_desc_buf(app.insp_desc_buf, 4096);
-    if (ImGui::InputTextMultiline("description",
+    if (ImGui::InputTextMultiline("##desc",
                                    app.insp_desc_buf.data(),
                                    app.insp_desc_buf.size(),
                                    ImVec2(-FLT_MIN, 60.0f)))
        any_change = true;
    // ---- Schema fields + Extras ----
    // Misma idea que Identity: 2-col table con label izquierda, input
    // derecha. Para extras añadimos un boton trash inline; para URLs un
    // boton Open. Ambos son SmallButton tras un input mas estrecho.
    if (!app.insp_field_keys.empty()) {
        ImGui::Spacing();
        ImGui::TextUnformatted("Fields");
        ImGui::Separator();
        const EntitySpec* spec = find_entity_spec(app.parsed_types,
                                                    app.insp_type_buf);
        for (size_t i = 0; i < app.insp_field_keys.size(); ++i) {
            const std::string& key = app.insp_field_keys[i];
            std::string& val       = app.insp_field_values[i];
            bool is_extra          = app.insp_is_extra[i] != 0;
            ImGui::PushID((int)i);
-            // Encuentra la FieldSpec si es del schema.
+        if (ImGui::BeginTable("##insp_fields", 2,
-            const FieldSpec* fs = nullptr;
+                               ImGuiTableFlags_SizingStretchProp |
-            if (!is_extra && spec) {
+                               ImGuiTableFlags_NoBordersInBody)) {
-                for (const auto& f : spec->fields) {
+            ImGui::TableSetupColumn("k", ImGuiTableColumnFlags_WidthFixed, 90.0f);
-                    if (f.name == key) { fs = &f; break; }
+            ImGui::TableSetupColumn("v", ImGuiTableColumnFlags_WidthStretch);
                }
            }
-            FieldKind kind = fs ? fs->kind : FK_STRING;
+            for (size_t i = 0; i < app.insp_field_keys.size(); ++i) {
-            std::string label = key;
+                const std::string& key = app.insp_field_keys[i];
-            if (fs && fs->required) label += " *";
+                std::string& val       = app.insp_field_values[i];
-            if (is_extra) label = "[extra] " + key;
+                bool is_extra          = app.insp_is_extra[i] != 0;
                ImGui::PushID((int)i);
-            char buf[1024];
+                // Encuentra la FieldSpec si es del schema.
-            size_t k = std::min(sizeof(buf) - 1, val.size());
+                const FieldSpec* fs = nullptr;
-            std::memcpy(buf, val.data(), k);
+                if (!is_extra && spec) {
-            buf[k] = 0;
+                    for (const auto& f : spec->fields) {
-
+                        if (f.name == key) { fs = &f; break; }
            bool changed = false;
            switch (kind) {
                case FK_BOOL: {
                    bool b = (val == "true" || val == "1");
                    if (ImGui::Checkbox(label.c_str(), &b)) {
                        val = b ? "true" : "false";
                        changed = true;
                    }
                    break;
                }
-                case FK_INT: {
+
-                    int n = std::atoi(val.c_str());
+                FieldKind kind = fs ? fs->kind : FK_STRING;
-                    if (ImGui::InputInt(label.c_str(), &n)) {
+
-                        char nb[32]; std::snprintf(nb, sizeof(nb), "%d", n);
+                // Label izquierdo. Marca `*` si es required, prefijo
-                        val = nb;
+                // [extra] si es campo libre añadido por el usuario.
-                        changed = true;
+                ImGui::TableNextRow(); ImGui::TableNextColumn();
-                    }
+                ImGui::AlignTextToFramePadding();
-                    break;
+                if (is_extra) {
                    ImGui::PushStyleColor(ImGuiCol_Text,
                        ImVec4(0.65f, 0.65f, 0.50f, 1.0f));
                    ImGui::Text("%s", key.c_str());
                    ImGui::PopStyleColor();
                } else if (fs && fs->required) {
                    ImGui::Text("%s *", key.c_str());
                } else {
                    ImGui::TextUnformatted(key.c_str());
                }
-                case FK_FLOAT: {
+
-                    double d = std::atof(val.c_str());
+                // Input derecha. Reserva espacio para el trailing button
-                    if (ImGui::InputDouble(label.c_str(), &d, 0.0, 0.0, "%.6g")) {
+                // cuando aplique (URL Open, extras trash).
-                        char nb[64]; std::snprintf(nb, sizeof(nb), "%.10g", d);
+                ImGui::TableNextColumn();
-                        val = nb;
+                bool needs_trail_btn = is_extra ||
-                        changed = true;
+                    (kind == FK_URL && !val.empty() &&
-                    }
+                     (val.rfind("http://", 0) == 0 ||
-                    break;
+                      val.rfind("https://", 0) == 0));
-                }
+                ImGui::SetNextItemWidth(needs_trail_btn ? -32.0f : -FLT_MIN);
-                case FK_ENUM: {
+
-                    if (fs && !fs->enum_values.empty()) {
+                char buf[1024];
-                        int cur = -1;
+                size_t k = std::min(sizeof(buf) - 1, val.size());
-                        for (size_t e = 0; e < fs->enum_values.size(); ++e) {
+                std::memcpy(buf, val.data(), k);
-                            if (fs->enum_values[e] == val) { cur = (int)e; break; }
+                buf[k] = 0;
                bool changed = false;
                switch (kind) {
                    case FK_BOOL: {
                        bool b = (val == "true" || val == "1");
                        if (ImGui::Checkbox("##v", &b)) {
                            val = b ? "true" : "false";
                            changed = true;
                        }
-                        if (ImGui::BeginCombo(label.c_str(), val.c_str())) {
+                        break;
                    }
                    case FK_INT: {
                        int n = std::atoi(val.c_str());
                        if (ImGui::InputInt("##v", &n)) {
                            char nb[32]; std::snprintf(nb, sizeof(nb), "%d", n);
                            val = nb;
                            changed = true;
                        }
                        break;
                    }
                    case FK_FLOAT: {
                        double d = std::atof(val.c_str());
                        if (ImGui::InputDouble("##v", &d, 0.0, 0.0, "%.6g")) {
                            char nb[64]; std::snprintf(nb, sizeof(nb), "%.10g", d);
                            val = nb;
                            changed = true;
                        }
                        break;
                    }
                    case FK_ENUM: {
                        if (fs && !fs->enum_values.empty()) {
                            int cur = -1;
                            for (size_t e = 0; e < fs->enum_values.size(); ++e) {
-                                bool is_sel = (int)e == cur;
+                                if (fs->enum_values[e] == val) { cur = (int)e; break; }
-                                if (ImGui::Selectable(fs->enum_values[e].c_str(), is_sel)) {
+                            }
-                                    val = fs->enum_values[e];
+                            if (ImGui::BeginCombo("##v", val.c_str())) {
-                                    changed = true;
+                                for (size_t e = 0; e < fs->enum_values.size(); ++e) {
-                                }
+                                    bool is_sel = (int)e == cur;
-                                if (is_sel) ImGui::SetItemDefaultFocus();
+                                    if (ImGui::Selectable(fs->enum_values[e].c_str(), is_sel)) {
                                        val = fs->enum_values[e];
                                        changed = true;
                                    }
                                    if (is_sel) ImGui::SetItemDefaultFocus();
                                }
                                ImGui::EndCombo();
                            }
                        } else {
                            if (ImGui::InputText("##v", buf, sizeof(buf))) {
                                val = buf;
                                changed = true;
                            }
                            ImGui::EndCombo();
                        }
-                    } else {
+                        break;
-                        // Sin valores: tratar como string
+                    }
-                        if (ImGui::InputText(label.c_str(), buf, sizeof(buf))) {
+                    case FK_URL:
                        if (ImGui::InputText("##v", buf, sizeof(buf))) {
                            val = buf;
                            changed = true;
                        }
-                    }
+                        if (!val.empty() &&
-                    break;
+                            (val.rfind("http://", 0) == 0 || val.rfind("https://", 0) == 0)) {
-                }
+                            ImGui::SameLine();
-                case FK_URL:
+                            if (ImGui::SmallButton(TI_EXTERNAL_LINK "##url")) {
                    if (ImGui::InputText(label.c_str(), buf, sizeof(buf))) {
                        val = buf;
                        changed = true;
                    }
                    if (!val.empty() &&
                        (val.rfind("http://", 0) == 0 || val.rfind("https://", 0) == 0)) {
                        ImGui::SameLine();
                        if (ImGui::SmallButton("Open##url")) {
 #if defined(_WIN32)
-                            std::string cmd = "start \"\" \"" + val + "\"";
+                                std::string cmd = "start \"\" \"" + val + "\"";
 #else
-                            std::string cmd = "xdg-open '" + val + "' >/dev/null 2>&1 &";
+                                std::string cmd = "xdg-open '" + val + "' >/dev/null 2>&1 &";
 #endif
-                            int rc = std::system(cmd.c_str()); (void)rc;
+                                int rc = std::system(cmd.c_str()); (void)rc;
                            }
                        }
-                    }
+                        break;
-                    break;
+                    case FK_DATE:
-                case FK_DATE:
+                    case FK_STRING:
-                case FK_STRING:
+                    default:
-                default:
+                        if (ImGui::InputTextWithHint("##v",
-                    if (ImGui::InputTextWithHint(label.c_str(),
+                                                       kind == FK_DATE ? "YYYY-MM-DD" : "",
-                                                   kind == FK_DATE ? "YYYY-MM-DD" : "",
+                                                       buf, sizeof(buf))) {
-                                                   buf, sizeof(buf))) {
+                            val = buf;
-                        val = buf;
+                            changed = true;
-                        changed = true;
+                        }
-                    }
+                        break;
                    break;
            }
            if (is_extra) {
                ImGui::SameLine();
                if (ImGui::SmallButton(TI_TRASH "##rm")) {
                    app.insp_field_keys.erase(app.insp_field_keys.begin() + i);
                    app.insp_field_values.erase(app.insp_field_values.begin() + i);
                    app.insp_is_extra.erase(app.insp_is_extra.begin() + i);
                    ImGui::PopID();
                    any_change = true;
                    --i;
                    continue;
                }
                if (is_extra) {
                    ImGui::SameLine();
                    if (ImGui::SmallButton(TI_TRASH "##rm")) {
                        app.insp_field_keys.erase(app.insp_field_keys.begin() + i);
                        app.insp_field_values.erase(app.insp_field_values.begin() + i);
                        app.insp_is_extra.erase(app.insp_is_extra.begin() + i);
                        ImGui::PopID();
                        any_change = true;
                        --i;
                        continue;
                    }
                }
                if (changed) any_change = true;
                ImGui::PopID();
            }
-            if (changed) any_change = true;
+            ImGui::EndTable();
            ImGui::PopID();
        }
    }
@@ -246,6 +246,22 @@ struct AppState {
    bool                      filter_dirty          = false;  // pide reapply
    int                       filter_focus_target   = -1;     // node_idx a centrar
    char                      filter_tag_input[64]  = {};     // input de chip nuevo
    // ---- Enricher config window --------------------------------------------
    // Cuando el usuario clica un enricher con `params` no vacios en el
    // context menu, se rellena este bloque y se abre una ventana ImGui
    // (no modal) que permite ajustar los valores antes de submitear el
    // job. La ventana es dockeable y movible; cerrar la X cancela.
    // Si el enricher no declara params, se submitea directamente con `{}`
    // sin pasar por aqui.
    bool                      enr_window_open = false;        // visibilidad
    std::string               enr_modal_id;                   // enricher.id
    std::string               enr_modal_node_id;              // sql_id del nodo
    std::string               enr_modal_node_label;           // label visible
    // Buffer editable por param. Tamano fijo 256 para inputs de texto;
    // suficiente para queries y URLs cortas. Indices alineados con
    // EnricherSpec::params del enricher seleccionado.
    std::vector<std::vector<char>> enr_modal_param_bufs;
 };
 // Toolbar superior (Open file, Layout selector, Filters..., Fit, Save layout).