feat(enrichers): vendoring de funciones Python por enricher (issue 0033b)
Cada enricher con `lang: python` y `uses_functions` no vacio ahora
puede empaquetar las funciones del registry que necesita en
`<enricher>/_vendored/`. El run.py importa de ahi en lugar de
`<registry_root>/python/functions/`, lo que hace al binario
distribuible sin dependencia de un fn_registry montado.
Cambios:
1. tools/vendor_enricher_python.sh
- Lee `uses_functions` del manifest (filtrando IDs `*_py_*`).
- Resuelve `file_path` desde registry.db.
- Copia recursivamente con expansion transitiva: si un fichero
vendorizado importa siblings del mismo dominio, los siblings
tambien se copian (resuelve el caso `extract_iocs.py` que
importa 7 modulos hermanos).
- Genera `.vendor.lock` con `<id> <sha256> <src_path>` por
funcion declarada para auditoria.
- Idempotente — si todos los hashes coinciden, no rehace nada.
2. Manifests actualizados con `uses_functions`:
- fetch_webpage: normalize_url + html_to_markdown
- extract_links: extract_urls
- extract_text_entities: extract_iocs
3. run.py de los 3 enrichers afectados: importan de `_vendored/`
si existe, fallback a `<registry_root>/python/functions/` en
modo dev (mantiene los tests pytest funcionando).
4. app.md: anade `cryptography` a python_runtime_deps porque el
blob `cybersecurity.cybersecurity` lo importa al top.
5. Tests:
- test_vendor_script.py — 6 tests del script: layout correcto,
transitive siblings, lock con SHA256, idempotencia, modulos
importables en aislamiento.
- 16 tests de enrichers existentes pasan via vendoring (no usan
registry_root porque _vendored/ tiene prioridad).
6. Issue 0033b movido a issues/completed/.
Tests: 32/32 verde (16 enrichers + 6 dispatcher + 4 runtime + 6
vendor).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -30,6 +30,7 @@ python_runtime_deps:
|
|||||||
- requests
|
- requests
|
||||||
- certifi
|
- certifi
|
||||||
- urllib3
|
- urllib3
|
||||||
|
- cryptography
|
||||||
---
|
---
|
||||||
|
|
||||||
## Arquitectura
|
## Arquitectura
|
||||||
|
|||||||
@@ -4,5 +4,7 @@ description: "Lee la markdown cacheada de un Webpage (metadata.markdown_path) y
|
|||||||
applies_to: [Webpage]
|
applies_to: [Webpage]
|
||||||
emits: [Url]
|
emits: [Url]
|
||||||
relations: [LINKS_TO]
|
relations: [LINKS_TO]
|
||||||
|
uses_functions:
|
||||||
|
- extract_urls_py_cybersecurity
|
||||||
params:
|
params:
|
||||||
- { name: max_links, type: int, default: 50 }
|
- { name: max_links, type: int, default: 50 }
|
||||||
|
|||||||
@@ -69,9 +69,16 @@ def main() -> int:
|
|||||||
text = open(abs_md, "r", encoding="utf-8", errors="replace").read()
|
text = open(abs_md, "r", encoding="utf-8", errors="replace").read()
|
||||||
|
|
||||||
progress(0.45, "extracting")
|
progress(0.45, "extracting")
|
||||||
py_funcs = os.path.join(registry_root, "python", "functions")
|
# Prefiere _vendored/ (issue 0033b) si existe; si no, fallback al
|
||||||
if py_funcs not in sys.path:
|
# registry_root para modo dev local.
|
||||||
sys.path.insert(0, py_funcs)
|
vendored = os.path.join(os.path.dirname(__file__), "_vendored")
|
||||||
|
if os.path.isdir(vendored):
|
||||||
|
if vendored not in sys.path:
|
||||||
|
sys.path.insert(0, vendored)
|
||||||
|
elif registry_root:
|
||||||
|
py_funcs = os.path.join(registry_root, "python", "functions")
|
||||||
|
if py_funcs not in sys.path:
|
||||||
|
sys.path.insert(0, py_funcs)
|
||||||
from cybersecurity.cybersecurity import extract_urls # type: ignore
|
from cybersecurity.cybersecurity import extract_urls # type: ignore
|
||||||
|
|
||||||
urls = extract_urls(text)
|
urls = extract_urls(text)
|
||||||
|
|||||||
@@ -4,6 +4,8 @@ description: "Lee la markdown cacheada de un Webpage y extrae IoCs (IPs, emails,
|
|||||||
applies_to: [Webpage]
|
applies_to: [Webpage]
|
||||||
emits: [Email, IPAddress, Domain, FileHash, CryptoWallet, CVE, MACAddress, Phone]
|
emits: [Email, IPAddress, Domain, FileHash, CryptoWallet, CVE, MACAddress, Phone]
|
||||||
relations: [EXTRACTED_FROM]
|
relations: [EXTRACTED_FROM]
|
||||||
|
uses_functions:
|
||||||
|
- extract_iocs_py_cybersecurity
|
||||||
params:
|
params:
|
||||||
- { name: types, type: string, default: "" }
|
- { name: types, type: string, default: "" }
|
||||||
- { name: max_entities, type: int, default: 200 }
|
- { name: max_entities, type: int, default: 200 }
|
||||||
|
|||||||
@@ -98,9 +98,16 @@ def main() -> int:
|
|||||||
text = open(abs_md, "r", encoding="utf-8", errors="replace").read()
|
text = open(abs_md, "r", encoding="utf-8", errors="replace").read()
|
||||||
|
|
||||||
progress(0.30, "extracting iocs")
|
progress(0.30, "extracting iocs")
|
||||||
py_funcs = os.path.join(registry_root, "python", "functions")
|
# Prefiere _vendored/ (issue 0033b) si existe; si no, fallback al
|
||||||
if py_funcs not in sys.path:
|
# registry_root para modo dev local.
|
||||||
sys.path.insert(0, py_funcs)
|
vendored = os.path.join(os.path.dirname(__file__), "_vendored")
|
||||||
|
if os.path.isdir(vendored):
|
||||||
|
if vendored not in sys.path:
|
||||||
|
sys.path.insert(0, vendored)
|
||||||
|
elif registry_root:
|
||||||
|
py_funcs = os.path.join(registry_root, "python", "functions")
|
||||||
|
if py_funcs not in sys.path:
|
||||||
|
sys.path.insert(0, py_funcs)
|
||||||
from cybersecurity.extract_iocs import extract_iocs # type: ignore
|
from cybersecurity.extract_iocs import extract_iocs # type: ignore
|
||||||
|
|
||||||
iocs = extract_iocs(text, types_list)
|
iocs = extract_iocs(text, types_list)
|
||||||
|
|||||||
@@ -4,5 +4,8 @@ description: "Descarga HTML de una URL, extrae markdown limpio (readabilipy) y g
|
|||||||
applies_to: [Url, Webpage]
|
applies_to: [Url, Webpage]
|
||||||
emits: [Domain]
|
emits: [Domain]
|
||||||
relations: [BELONGS_TO]
|
relations: [BELONGS_TO]
|
||||||
|
uses_functions:
|
||||||
|
- normalize_url_py_cybersecurity
|
||||||
|
- html_to_markdown_py_core
|
||||||
params:
|
params:
|
||||||
- { name: timeout_s, type: int, default: 15 }
|
- { name: timeout_s, type: int, default: 15 }
|
||||||
|
|||||||
@@ -38,10 +38,16 @@ def log(msg: str) -> None:
|
|||||||
|
|
||||||
|
|
||||||
def load_registry_funcs(registry_root: str):
|
def load_registry_funcs(registry_root: str):
|
||||||
"""Anade el registry al sys.path e importa funciones que usamos."""
|
"""Importa funciones del registry. Prefiere `_vendored/` (issue 0033b);
|
||||||
py_funcs = os.path.join(registry_root, "python", "functions")
|
si no existe, fallback a `<registry_root>/python/functions/` (modo dev)."""
|
||||||
if py_funcs not in sys.path:
|
vendored = os.path.join(os.path.dirname(__file__), "_vendored")
|
||||||
sys.path.insert(0, py_funcs)
|
if os.path.isdir(vendored):
|
||||||
|
if vendored not in sys.path:
|
||||||
|
sys.path.insert(0, vendored)
|
||||||
|
elif registry_root:
|
||||||
|
py_funcs = os.path.join(registry_root, "python", "functions")
|
||||||
|
if py_funcs not in sys.path:
|
||||||
|
sys.path.insert(0, py_funcs)
|
||||||
from cybersecurity.cybersecurity import normalize_url # type: ignore
|
from cybersecurity.cybersecurity import normalize_url # type: ignore
|
||||||
from core.html_to_markdown import html_to_markdown # type: ignore
|
from core.html_to_markdown import html_to_markdown # type: ignore
|
||||||
return normalize_url, html_to_markdown
|
return normalize_url, html_to_markdown
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
---
|
---
|
||||||
id: 0001
|
id: 0001
|
||||||
title: Chat con Claude sobre el grafo
|
title: Chat con Claude sobre el grafo
|
||||||
status: pending
|
status: completed
|
||||||
priority: high
|
priority: high
|
||||||
created: 2026-04-30
|
created: 2026-04-30
|
||||||
---
|
---
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
---
|
---
|
||||||
id: 0003
|
id: 0003
|
||||||
title: Enricher web — descargar URL/dominio y extraer texto
|
title: Enricher web — descargar URL/dominio y extraer texto
|
||||||
status: pending
|
status: completed
|
||||||
priority: medium
|
priority: medium
|
||||||
created: 2026-04-30
|
created: 2026-04-30
|
||||||
---
|
---
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
---
|
---
|
||||||
id: 0026
|
id: 0026
|
||||||
title: Sistema de jobs — enrichers asincronos en background
|
title: Sistema de jobs — enrichers asincronos en background
|
||||||
status: in_progress
|
status: completed
|
||||||
priority: high
|
priority: high
|
||||||
created: 2026-05-01
|
created: 2026-05-01
|
||||||
blocks: [0027, 0028, 0029, 0030]
|
blocks: [0027, 0028, 0029, 0030]
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
---
|
---
|
||||||
id: 0027
|
id: 0027
|
||||||
title: Tipo Webpage + cache de documentos descargados
|
title: Tipo Webpage + cache de documentos descargados
|
||||||
status: pending
|
status: completed
|
||||||
priority: high
|
priority: high
|
||||||
created: 2026-05-01
|
created: 2026-05-01
|
||||||
depends_on: [0026]
|
depends_on: [0026]
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
---
|
---
|
||||||
id: 0028
|
id: 0028
|
||||||
title: Enricher fetch_webpage (MVP end-to-end)
|
title: Enricher fetch_webpage (MVP end-to-end)
|
||||||
status: pending
|
status: completed
|
||||||
priority: high
|
priority: high
|
||||||
created: 2026-05-01
|
created: 2026-05-01
|
||||||
depends_on: [0026, 0027]
|
depends_on: [0026, 0027]
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
---
|
---
|
||||||
id: 0028b
|
id: 0028b
|
||||||
title: Enrichers extract_domain, extract_links, extract_text_entities
|
title: Enrichers extract_domain, extract_links, extract_text_entities
|
||||||
status: pending
|
status: completed
|
||||||
priority: high
|
priority: high
|
||||||
created: 2026-05-01
|
created: 2026-05-01
|
||||||
depends_on: [0028]
|
depends_on: [0028]
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
---
|
---
|
||||||
id: 0031
|
id: 0031
|
||||||
title: Layout estable al recargar — auto-save, halo placement, sin fit, physics off
|
title: Layout estable al recargar — auto-save, halo placement, sin fit, physics off
|
||||||
status: in_progress
|
status: completed
|
||||||
priority: high
|
priority: high
|
||||||
created: 2026-05-01
|
created: 2026-05-01
|
||||||
related_to: [0026]
|
related_to: [0026]
|
||||||
|
|||||||
+2
-1
@@ -1,9 +1,10 @@
|
|||||||
---
|
---
|
||||||
id: 0033b
|
id: 0033b
|
||||||
title: Vendoring de funciones Python por enricher
|
title: Vendoring de funciones Python por enricher
|
||||||
status: pending
|
status: completed
|
||||||
priority: high
|
priority: high
|
||||||
created: 2026-05-02
|
created: 2026-05-02
|
||||||
|
completed: 2026-05-03
|
||||||
depends_on: [0033]
|
depends_on: [0033]
|
||||||
---
|
---
|
||||||
|
|
||||||
@@ -0,0 +1,134 @@
|
|||||||
|
"""Tests del script tools/vendor_enricher_python.sh (issue 0033b).
|
||||||
|
|
||||||
|
Verifica:
|
||||||
|
- manifest sin uses_functions Python -> no crea _vendored/.
|
||||||
|
- manifest con un uses_functions -> copia el .py + __init__.
|
||||||
|
- dep transitiva (extract_iocs importa siblings) -> copia siblings.
|
||||||
|
- .vendor.lock con SHA256 + path origen.
|
||||||
|
- Idempotencia: 2da llamada con mismo estado no rehace nada.
|
||||||
|
- Cambio en el manifest invalida el lock.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
import subprocess
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from conftest import APP_DIR_SRC, REGISTRY_ROOT
|
||||||
|
|
||||||
|
|
||||||
|
SCRIPT = APP_DIR_SRC / "tools" / "vendor_enricher_python.sh"
|
||||||
|
|
||||||
|
|
||||||
|
def _make_enricher_dir(tmp_path: Path, manifest: str) -> Path:
|
||||||
|
enr = tmp_path / "test_enricher"
|
||||||
|
enr.mkdir()
|
||||||
|
(enr / "manifest.yaml").write_text(manifest, encoding="utf-8")
|
||||||
|
(enr / "run.py").write_text("# stub\n", encoding="utf-8")
|
||||||
|
return enr
|
||||||
|
|
||||||
|
|
||||||
|
def _run_vendor(enr_dir: Path) -> subprocess.CompletedProcess:
|
||||||
|
return subprocess.run(
|
||||||
|
["bash", str(SCRIPT), str(enr_dir), str(REGISTRY_ROOT)],
|
||||||
|
capture_output=True, text=True, timeout=20,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_no_uses_functions_does_not_create_vendored(tmp_path):
|
||||||
|
enr = _make_enricher_dir(tmp_path,
|
||||||
|
"id: x\nname: x\napplies_to: [text]\n")
|
||||||
|
proc = _run_vendor(enr)
|
||||||
|
assert proc.returncode == 0, proc.stderr
|
||||||
|
assert not (enr / "_vendored").exists()
|
||||||
|
assert not (enr / ".vendor.lock").exists()
|
||||||
|
|
||||||
|
|
||||||
|
def test_single_dep_creates_vendored_layout(tmp_path):
|
||||||
|
enr = _make_enricher_dir(tmp_path,
|
||||||
|
"id: x\nname: x\napplies_to: [Url]\n"
|
||||||
|
"uses_functions:\n"
|
||||||
|
" - normalize_url_py_cybersecurity\n")
|
||||||
|
proc = _run_vendor(enr)
|
||||||
|
assert proc.returncode == 0, proc.stderr
|
||||||
|
assert (enr / "_vendored" / "__init__.py").exists()
|
||||||
|
assert (enr / "_vendored" / "cybersecurity" / "__init__.py").exists()
|
||||||
|
assert (enr / "_vendored" / "cybersecurity" / "cybersecurity.py").exists()
|
||||||
|
assert (enr / ".vendor.lock").exists()
|
||||||
|
lock = (enr / ".vendor.lock").read_text()
|
||||||
|
assert "normalize_url_py_cybersecurity" in lock
|
||||||
|
|
||||||
|
|
||||||
|
def test_transitive_siblings_are_copied(tmp_path):
|
||||||
|
"""extract_iocs.py importa 7 modulos siblings — todos deben venir."""
|
||||||
|
enr = _make_enricher_dir(tmp_path,
|
||||||
|
"id: x\nname: x\napplies_to: [Webpage]\n"
|
||||||
|
"uses_functions:\n"
|
||||||
|
" - extract_iocs_py_cybersecurity\n")
|
||||||
|
proc = _run_vendor(enr)
|
||||||
|
assert proc.returncode == 0, proc.stderr
|
||||||
|
cyb = enr / "_vendored" / "cybersecurity"
|
||||||
|
assert (cyb / "extract_iocs.py").exists()
|
||||||
|
expected_siblings = {
|
||||||
|
"extract_ip_addresses.py", "extract_emails.py",
|
||||||
|
"extract_domains.py", "extract_file_hashes.py",
|
||||||
|
"extract_crypto_wallets.py", "extract_cve_ids.py",
|
||||||
|
"extract_mac_addresses.py", "extract_phone_numbers.py",
|
||||||
|
}
|
||||||
|
found = {p.name for p in cyb.glob("*.py")}
|
||||||
|
missing = expected_siblings - found
|
||||||
|
assert not missing, f"siblings no copiados: {missing}"
|
||||||
|
|
||||||
|
|
||||||
|
def test_lock_contains_correct_sha256(tmp_path):
|
||||||
|
enr = _make_enricher_dir(tmp_path,
|
||||||
|
"id: x\nname: x\napplies_to: [Url]\n"
|
||||||
|
"uses_functions:\n - normalize_url_py_cybersecurity\n")
|
||||||
|
proc = _run_vendor(enr)
|
||||||
|
assert proc.returncode == 0, proc.stderr
|
||||||
|
|
||||||
|
src = REGISTRY_ROOT / "python" / "functions" / "cybersecurity" / "cybersecurity.py"
|
||||||
|
expected_sha = hashlib.sha256(src.read_bytes()).hexdigest()
|
||||||
|
|
||||||
|
lock = (enr / ".vendor.lock").read_text()
|
||||||
|
assert expected_sha in lock, lock
|
||||||
|
|
||||||
|
|
||||||
|
def test_idempotency_skips_when_unchanged(tmp_path):
|
||||||
|
enr = _make_enricher_dir(tmp_path,
|
||||||
|
"id: x\nname: x\napplies_to: [Url]\n"
|
||||||
|
"uses_functions:\n - normalize_url_py_cybersecurity\n")
|
||||||
|
p1 = _run_vendor(enr)
|
||||||
|
assert p1.returncode == 0
|
||||||
|
p2 = _run_vendor(enr)
|
||||||
|
assert p2.returncode == 0
|
||||||
|
assert "sin cambios" in p2.stdout, p2.stdout
|
||||||
|
|
||||||
|
|
||||||
|
def test_vendored_module_can_be_imported_in_isolation(tmp_path):
|
||||||
|
"""Smoke: el _vendored/ resultante es importable sin registry_root."""
|
||||||
|
enr = _make_enricher_dir(tmp_path,
|
||||||
|
"id: x\nname: x\napplies_to: [Webpage]\n"
|
||||||
|
"uses_functions:\n - extract_urls_py_cybersecurity\n")
|
||||||
|
proc = _run_vendor(enr)
|
||||||
|
assert proc.returncode == 0, proc.stderr
|
||||||
|
|
||||||
|
# Lanzamos un Python externo con _vendored como unico path adicional.
|
||||||
|
code = (
|
||||||
|
"import sys; sys.path.insert(0, 'enrichers_test/_vendored');"
|
||||||
|
"from cybersecurity.cybersecurity import extract_urls;"
|
||||||
|
"print(len(extract_urls('foo http://x.com bar')))"
|
||||||
|
)
|
||||||
|
# Crear symlink temporal con el nombre esperado.
|
||||||
|
fake = tmp_path / "enrichers_test"
|
||||||
|
fake.symlink_to(enr)
|
||||||
|
proc2 = subprocess.run(
|
||||||
|
["python3", "-c", code],
|
||||||
|
cwd=str(tmp_path), capture_output=True, text=True, timeout=10,
|
||||||
|
)
|
||||||
|
assert proc2.returncode == 0, proc2.stderr
|
||||||
|
assert proc2.stdout.strip() == "1"
|
||||||
Executable
+187
@@ -0,0 +1,187 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# vendor_enricher_python.sh — copia las funciones Python del registry
|
||||||
|
# que un enricher declara en `uses_functions` a su directorio
|
||||||
|
# `_vendored/`. El run.py importa de `_vendored/` en lugar de
|
||||||
|
# `<registry_root>/python/functions/`, lo que hace al binario
|
||||||
|
# distribuible sin acceso al fn_registry.
|
||||||
|
#
|
||||||
|
# Issue 0033b.
|
||||||
|
#
|
||||||
|
# Uso:
|
||||||
|
# tools/vendor_enricher_python.sh <enricher_dir> [<registry_root>]
|
||||||
|
#
|
||||||
|
# Lee `uses_functions` del manifest YAML, filtra IDs `*_py_*`,
|
||||||
|
# resuelve `file_path` desde registry.db, copia los .py y todas las
|
||||||
|
# importaciones siblings dentro del mismo dominio (transitivo).
|
||||||
|
#
|
||||||
|
# Genera `.vendor.lock` con `<id> <sha256> <src_path>` para
|
||||||
|
# auditoria. Idempotente — si los hashes coinciden, no copia.
|
||||||
|
#
|
||||||
|
# Salida:
|
||||||
|
# <enricher_dir>/_vendored/__init__.py
|
||||||
|
# <enricher_dir>/_vendored/<domain>/__init__.py
|
||||||
|
# <enricher_dir>/_vendored/<domain>/<filename>.py
|
||||||
|
# <enricher_dir>/.vendor.lock
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
ENR_DIR="${1:?enricher_dir requerido}"
|
||||||
|
REGISTRY_ROOT="${2:-${REGISTRY_ROOT:-$(pwd)}}"
|
||||||
|
|
||||||
|
if [[ ! -f "$ENR_DIR/manifest.yaml" ]]; then
|
||||||
|
echo "ERROR: $ENR_DIR/manifest.yaml no existe" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
if [[ ! -f "$REGISTRY_ROOT/registry.db" ]]; then
|
||||||
|
echo "ERROR: $REGISTRY_ROOT/registry.db no existe (REGISTRY_ROOT incorrecto)" >&2
|
||||||
|
exit 2
|
||||||
|
fi
|
||||||
|
|
||||||
|
VENDOR="$ENR_DIR/_vendored"
|
||||||
|
LOCK="$ENR_DIR/.vendor.lock"
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------------------
|
||||||
|
# Leer uses_functions del manifest (subset YAML soportado por el indexer C++).
|
||||||
|
# Acepta forma inline `[a, b]` o lista en lineas indentadas con `- `.
|
||||||
|
# ----------------------------------------------------------------------------
|
||||||
|
ids=$(awk '
|
||||||
|
/^uses_functions:[[:space:]]*\[/ {
|
||||||
|
line = $0
|
||||||
|
sub(/^uses_functions:[[:space:]]*\[/, "", line)
|
||||||
|
sub(/\].*$/, "", line)
|
||||||
|
gsub(/[",]/, " ", line)
|
||||||
|
print line
|
||||||
|
exit
|
||||||
|
}
|
||||||
|
/^uses_functions:[[:space:]]*$/ { collecting = 1; next }
|
||||||
|
collecting && /^[[:space:]]*-[[:space:]]+/ {
|
||||||
|
sub(/^[[:space:]]*-[[:space:]]+/, "")
|
||||||
|
sub(/[[:space:]]*#.*$/, "")
|
||||||
|
gsub(/[\047"]/, "")
|
||||||
|
print
|
||||||
|
next
|
||||||
|
}
|
||||||
|
collecting && /^[^[:space:]-]/ { collecting = 0 }
|
||||||
|
' "$ENR_DIR/manifest.yaml" | tr ' ' '\n' | awk 'NF' | grep '_py_' || true)
|
||||||
|
|
||||||
|
if [[ -z "$ids" ]]; then
|
||||||
|
# No hay nada que vendorizar — limpiamos vendor/ y lock por si
|
||||||
|
# quedaron de un manifest anterior.
|
||||||
|
rm -rf "$VENDOR" "$LOCK"
|
||||||
|
echo "vendor: $ENR_DIR — sin uses_functions Python"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------------------
|
||||||
|
# Para cada ID, obtener file_path de registry.db. Construir lista de
|
||||||
|
# (id, abs_src_path, domain, basename) — la unidad atomica de copia.
|
||||||
|
# ----------------------------------------------------------------------------
|
||||||
|
declare -A SEEN # paths absolutos ya procesados (dedup)
|
||||||
|
declare -A LOCK_NEW # nueva tabla de lock: id -> sha256 src_path
|
||||||
|
|
||||||
|
queue=()
|
||||||
|
for id in $ids; do
|
||||||
|
fp=$(sqlite3 "$REGISTRY_ROOT/registry.db" \
|
||||||
|
"SELECT file_path FROM functions WHERE id='$id';")
|
||||||
|
if [[ -z "$fp" ]]; then
|
||||||
|
echo "WARN: $id no esta en registry.db (skip)" >&2
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
abs="$REGISTRY_ROOT/$fp"
|
||||||
|
if [[ ! -f "$abs" ]]; then
|
||||||
|
echo "WARN: file_path '$fp' no existe (skip $id)" >&2
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
queue+=("$id|$abs")
|
||||||
|
done
|
||||||
|
|
||||||
|
if [[ ${#queue[@]} -eq 0 ]]; then
|
||||||
|
rm -rf "$VENDOR" "$LOCK"
|
||||||
|
echo "vendor: $ENR_DIR — ninguna funcion Python resoluble"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------------------
|
||||||
|
# Idempotencia: comparar hashes vs lock existente. Si todos coinciden,
|
||||||
|
# salir sin tocar nada.
|
||||||
|
# ----------------------------------------------------------------------------
|
||||||
|
state_hash=""
|
||||||
|
for entry in "${queue[@]}"; do
|
||||||
|
id="${entry%%|*}"
|
||||||
|
abs="${entry#*|}"
|
||||||
|
sha=$(sha256sum "$abs" | cut -d' ' -f1)
|
||||||
|
state_hash+="$id|$sha"$'\n'
|
||||||
|
done
|
||||||
|
state_sha=$(echo -n "$state_hash" | sha256sum | cut -d' ' -f1)
|
||||||
|
|
||||||
|
if [[ -f "$LOCK" ]]; then
|
||||||
|
cur_state=""
|
||||||
|
while IFS= read -r line; do
|
||||||
|
# formato: "<id> <sha256> <path>"
|
||||||
|
eid=$(echo "$line" | awk '{print $1}')
|
||||||
|
esha=$(echo "$line" | awk '{print $2}')
|
||||||
|
cur_state+="$eid|$esha"$'\n'
|
||||||
|
done < "$LOCK"
|
||||||
|
cur_sha=$(echo -n "$cur_state" | sha256sum | cut -d' ' -f1)
|
||||||
|
if [[ "$cur_sha" == "$state_sha" ]]; then
|
||||||
|
echo "vendor: $ENR_DIR — sin cambios (.vendor.lock OK)"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------------------
|
||||||
|
# Copia con expansion transitiva de imports siblings dentro del mismo
|
||||||
|
# dominio. Si un .py vendorizado tiene `from X import Y` o
|
||||||
|
# `import X` donde X es un modulo del mismo directorio, X.py tambien
|
||||||
|
# se copia (solo si existe junto al fuente original).
|
||||||
|
# ----------------------------------------------------------------------------
|
||||||
|
rm -rf "$VENDOR"
|
||||||
|
mkdir -p "$VENDOR"
|
||||||
|
touch "$VENDOR/__init__.py"
|
||||||
|
|
||||||
|
copy_with_siblings() {
|
||||||
|
local abs="$1"
|
||||||
|
if [[ -n "${SEEN[$abs]:-}" ]]; then return 0; fi
|
||||||
|
SEEN[$abs]=1
|
||||||
|
|
||||||
|
# Inferir dominio del path: <root>/python/functions/<domain>/<file>.py
|
||||||
|
local rel
|
||||||
|
rel=$(realpath --relative-to="$REGISTRY_ROOT" "$abs")
|
||||||
|
local domain
|
||||||
|
domain=$(echo "$rel" | awk -F/ '{print $(NF-1)}')
|
||||||
|
local fname
|
||||||
|
fname=$(basename "$abs")
|
||||||
|
local dst_dir="$VENDOR/$domain"
|
||||||
|
mkdir -p "$dst_dir"
|
||||||
|
touch "$dst_dir/__init__.py"
|
||||||
|
cp "$abs" "$dst_dir/$fname"
|
||||||
|
|
||||||
|
# Escanear imports siblings: lineas `from <name> import` o
|
||||||
|
# `import <name>` donde <name>.py existe en el mismo dir que abs.
|
||||||
|
local src_dir
|
||||||
|
src_dir=$(dirname "$abs")
|
||||||
|
local sibling_names
|
||||||
|
sibling_names=$(grep -E '^[[:space:]]*(from [a-zA-Z_][a-zA-Z0-9_]+ import|import [a-zA-Z_][a-zA-Z0-9_]+)' "$abs" \
|
||||||
|
| sed -E 's/^[[:space:]]*from ([a-zA-Z_][a-zA-Z0-9_]+).*/\1/; s/^[[:space:]]*import ([a-zA-Z_][a-zA-Z0-9_]+).*/\1/' \
|
||||||
|
| sort -u)
|
||||||
|
for name in $sibling_names; do
|
||||||
|
local sib="$src_dir/$name.py"
|
||||||
|
if [[ -f "$sib" && "$sib" != "$abs" ]]; then
|
||||||
|
copy_with_siblings "$sib"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
> "$LOCK.tmp"
|
||||||
|
for entry in "${queue[@]}"; do
|
||||||
|
id="${entry%%|*}"
|
||||||
|
abs="${entry#*|}"
|
||||||
|
copy_with_siblings "$abs"
|
||||||
|
sha=$(sha256sum "$abs" | cut -d' ' -f1)
|
||||||
|
rel=$(realpath --relative-to="$REGISTRY_ROOT" "$abs")
|
||||||
|
echo "$id $sha $rel" >> "$LOCK.tmp"
|
||||||
|
done
|
||||||
|
|
||||||
|
mv "$LOCK.tmp" "$LOCK"
|
||||||
|
n=$(wc -l < "$LOCK")
|
||||||
|
echo "vendor: $ENR_DIR — $n funcs declaradas, $(find "$VENDOR" -name '*.py' | wc -l) archivos copiados"
|
||||||
Reference in New Issue
Block a user