#!/usr/bin/env bash # vendor_enricher_python.sh — copia las funciones Python del registry # que un enricher declara en `uses_functions` a su directorio # `_vendored/`. El run.py importa de `_vendored/` en lugar de # `/python/functions/`, lo que hace al binario # distribuible sin acceso al fn_registry. # # Issue 0033b. # # Uso: # tools/vendor_enricher_python.sh [] # # Lee `uses_functions` del manifest YAML, filtra IDs `*_py_*`, # resuelve `file_path` desde registry.db, copia los .py y todas las # importaciones siblings dentro del mismo dominio (transitivo). # # Genera `.vendor.lock` con ` ` para # auditoria. Idempotente — si los hashes coinciden, no copia. # # Salida: # /_vendored/__init__.py # /_vendored//__init__.py # /_vendored//.py # /.vendor.lock set -euo pipefail ENR_DIR="${1:?enricher_dir requerido}" REGISTRY_ROOT="${2:-${REGISTRY_ROOT:-$(pwd)}}" if [[ ! -f "$ENR_DIR/manifest.yaml" ]]; then echo "ERROR: $ENR_DIR/manifest.yaml no existe" >&2 exit 1 fi if [[ ! -f "$REGISTRY_ROOT/registry.db" ]]; then echo "ERROR: $REGISTRY_ROOT/registry.db no existe (REGISTRY_ROOT incorrecto)" >&2 exit 2 fi VENDOR="$ENR_DIR/_vendored" LOCK="$ENR_DIR/.vendor.lock" # ---------------------------------------------------------------------------- # Leer uses_functions del manifest (subset YAML soportado por el indexer C++). # Acepta forma inline `[a, b]` o lista en lineas indentadas con `- `. # ---------------------------------------------------------------------------- ids=$(awk ' /^uses_functions:[[:space:]]*\[/ { line = $0 sub(/^uses_functions:[[:space:]]*\[/, "", line) sub(/\].*$/, "", line) gsub(/[",]/, " ", line) print line exit } /^uses_functions:[[:space:]]*$/ { collecting = 1; next } collecting && /^[[:space:]]*-[[:space:]]+/ { sub(/^[[:space:]]*-[[:space:]]+/, "") sub(/[[:space:]]*#.*$/, "") gsub(/[\047"]/, "") print next } collecting && /^[^[:space:]-]/ { collecting = 0 } ' "$ENR_DIR/manifest.yaml" | tr ' ' '\n' | awk 'NF' | grep '_py_' || true) if [[ -z "$ids" ]]; then # No hay nada que vendorizar — limpiamos vendor/ y lock por si # quedaron de un manifest anterior. rm -rf "$VENDOR" "$LOCK" echo "vendor: $ENR_DIR — sin uses_functions Python" exit 0 fi # ---------------------------------------------------------------------------- # Para cada ID, obtener file_path de registry.db. Construir lista de # (id, abs_src_path, domain, basename) — la unidad atomica de copia. # ---------------------------------------------------------------------------- declare -A SEEN # paths absolutos ya procesados (dedup) declare -A LOCK_NEW # nueva tabla de lock: id -> sha256 src_path queue=() for id in $ids; do fp=$(sqlite3 "$REGISTRY_ROOT/registry.db" \ "SELECT file_path FROM functions WHERE id='$id';") if [[ -z "$fp" ]]; then echo "WARN: $id no esta en registry.db (skip)" >&2 continue fi abs="$REGISTRY_ROOT/$fp" if [[ ! -f "$abs" ]]; then echo "WARN: file_path '$fp' no existe (skip $id)" >&2 continue fi queue+=("$id|$abs") done if [[ ${#queue[@]} -eq 0 ]]; then rm -rf "$VENDOR" "$LOCK" echo "vendor: $ENR_DIR — ninguna funcion Python resoluble" exit 0 fi # ---------------------------------------------------------------------------- # Idempotencia: comparar hashes vs lock existente. Si todos coinciden, # salir sin tocar nada. # ---------------------------------------------------------------------------- state_hash="" for entry in "${queue[@]}"; do id="${entry%%|*}" abs="${entry#*|}" sha=$(sha256sum "$abs" | cut -d' ' -f1) state_hash+="$id|$sha"$'\n' done state_sha=$(echo -n "$state_hash" | sha256sum | cut -d' ' -f1) if [[ -f "$LOCK" ]]; then cur_state="" while IFS= read -r line; do # formato: " " eid=$(echo "$line" | awk '{print $1}') esha=$(echo "$line" | awk '{print $2}') cur_state+="$eid|$esha"$'\n' done < "$LOCK" cur_sha=$(echo -n "$cur_state" | sha256sum | cut -d' ' -f1) if [[ "$cur_sha" == "$state_sha" ]]; then echo "vendor: $ENR_DIR — sin cambios (.vendor.lock OK)" exit 0 fi fi # ---------------------------------------------------------------------------- # Copia con expansion transitiva de imports siblings dentro del mismo # dominio. Si un .py vendorizado tiene `from X import Y` o # `import X` donde X es un modulo del mismo directorio, X.py tambien # se copia (solo si existe junto al fuente original). # ---------------------------------------------------------------------------- rm -rf "$VENDOR" mkdir -p "$VENDOR" touch "$VENDOR/__init__.py" copy_with_siblings() { local abs="$1" if [[ -n "${SEEN[$abs]:-}" ]]; then return 0; fi SEEN[$abs]=1 # Inferir dominio del path: /python/functions//.py local rel rel=$(realpath --relative-to="$REGISTRY_ROOT" "$abs") local domain domain=$(echo "$rel" | awk -F/ '{print $(NF-1)}') local fname fname=$(basename "$abs") local dst_dir="$VENDOR/$domain" mkdir -p "$dst_dir" touch "$dst_dir/__init__.py" cp "$abs" "$dst_dir/$fname" # Escanear imports siblings: lineas `from import` o # `import ` donde .py existe en el mismo dir que abs. local src_dir src_dir=$(dirname "$abs") local sibling_names sibling_names=$(grep -E '^[[:space:]]*(from [a-zA-Z_][a-zA-Z0-9_]+ import|import [a-zA-Z_][a-zA-Z0-9_]+)' "$abs" \ | sed -E 's/^[[:space:]]*from ([a-zA-Z_][a-zA-Z0-9_]+).*/\1/; s/^[[:space:]]*import ([a-zA-Z_][a-zA-Z0-9_]+).*/\1/' \ | sort -u) for name in $sibling_names; do local sib="$src_dir/$name.py" if [[ -f "$sib" && "$sib" != "$abs" ]]; then copy_with_siblings "$sib" fi done } > "$LOCK.tmp" for entry in "${queue[@]}"; do id="${entry%%|*}" abs="${entry#*|}" copy_with_siblings "$abs" sha=$(sha256sum "$abs" | cut -d' ' -f1) rel=$(realpath --relative-to="$REGISTRY_ROOT" "$abs") echo "$id $sha $rel" >> "$LOCK.tmp" done mv "$LOCK.tmp" "$LOCK" n=$(wc -l < "$LOCK") echo "vendor: $ENR_DIR — $n funcs declaradas, $(find "$VENDOR" -name '*.py' | wc -l) archivos copiados"