chore: auto-commit (95 archivos)
- cmd/fn/doctor.go - cmd/fn/main.go - cpp/apps/primitives_gallery/playground/tables/CMakeLists.txt - cpp/apps/primitives_gallery/playground/tables/data_table.cpp - cpp/apps/primitives_gallery/playground/tables/data_table_logic.cpp - cpp/apps/primitives_gallery/playground/tables/data_table_logic.h - cpp/apps/primitives_gallery/playground/tables/self_test.cpp - cpp/apps/primitives_gallery/playground/tables/tql.cpp - cpp/apps/primitives_gallery/playground/tables/viz.cpp - cpp/apps/primitives_gallery/playground/tables/viz.h - ... Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,73 @@
|
||||
---
|
||||
name: cuda_toolkit_check
|
||||
kind: function
|
||||
lang: bash
|
||||
domain: infra
|
||||
version: "1.0.0"
|
||||
purity: impure
|
||||
signature: "cuda_toolkit_check() -> void"
|
||||
description: "Detecta componentes CUDA instalados en el sistema y emite pares key=value a stdout: nvcc (version o missing), nvidia_smi (present/missing), driver_version, cuda_libs (path o missing) y overall (ok|partial|missing). Exit code 0 siempre — funcion informativa, no fatal."
|
||||
tags: [cuda, nvidia, gpu, hardware, probe, infra, toolkit]
|
||||
uses_functions: []
|
||||
uses_types: []
|
||||
returns: []
|
||||
returns_optional: false
|
||||
error_type: "error_go_core"
|
||||
imports: []
|
||||
params:
|
||||
- name: (ninguno)
|
||||
desc: "No toma parametros. Lee el estado del sistema via nvcc, nvidia-smi y busqueda en rutas canonicas de CUDA."
|
||||
output: "Cinco pares key=value en stdout: nvcc, nvidia_smi, driver_version, cuda_libs, overall. overall=ok si los tres componentes principales estan presentes; partial si algunos; missing si ninguno."
|
||||
tested: false
|
||||
tests: []
|
||||
test_file_path: ""
|
||||
file_path: "bash/functions/infra/cuda_toolkit_check.sh"
|
||||
---
|
||||
|
||||
## Ejemplo
|
||||
|
||||
```bash
|
||||
source bash/functions/infra/cuda_toolkit_check.sh
|
||||
cuda_toolkit_check
|
||||
```
|
||||
|
||||
Salida en maquina con CUDA completo:
|
||||
```
|
||||
nvcc=12.4
|
||||
nvidia_smi=present
|
||||
driver_version=550.54.15
|
||||
cuda_libs=/usr/local/cuda
|
||||
overall=ok
|
||||
```
|
||||
|
||||
Salida en maquina sin CUDA:
|
||||
```
|
||||
nvcc=missing
|
||||
nvidia_smi=missing
|
||||
driver_version=missing
|
||||
cuda_libs=missing
|
||||
overall=missing
|
||||
```
|
||||
|
||||
Invocar directamente:
|
||||
```bash
|
||||
bash bash/functions/infra/cuda_toolkit_check.sh
|
||||
```
|
||||
|
||||
Parsear desde otro script:
|
||||
```bash
|
||||
eval "$(cuda_toolkit_check)"
|
||||
echo "CUDA overall: $overall"
|
||||
if [[ "$overall" == "ok" ]]; then
|
||||
echo "CUDA completo: nvcc=$nvcc driver=$driver_version libs=$cuda_libs"
|
||||
fi
|
||||
```
|
||||
|
||||
## Notas
|
||||
|
||||
- Idempotente: no instala, no modifica nada, solo consulta.
|
||||
- Exit code 0 siempre — ausencia de CUDA es informacion, no fallo.
|
||||
- Busca `libcuda.so` en `/usr/local/cuda*`, `/opt/cuda*` y via `ldconfig -p`.
|
||||
- `driver_version` refleja el driver NVIDIA del kernel, reportado por nvidia-smi.
|
||||
- `nvcc` reporta la version del compilador CUDA toolkit (puede diferir de la version soportada por el driver).
|
||||
- Para obtener la version CUDA maxima soportada por el driver, usar `get_gpu_info_go_infra` (campo CudaVersion del struct GpuInfo).
|
||||
@@ -0,0 +1,99 @@
|
||||
#!/usr/bin/env bash
|
||||
# cuda_toolkit_check — Detecta componentes CUDA instalados en el sistema.
|
||||
#
|
||||
# Emite pares key=value a stdout:
|
||||
# nvcc=<version|missing>
|
||||
# nvidia_smi=<present|missing>
|
||||
# driver_version=<version|missing>
|
||||
# cuda_libs=<path|missing>
|
||||
# overall=<ok|partial|missing>
|
||||
#
|
||||
# Exit code 0 siempre (funcion informativa, no fatal).
|
||||
# Idempotente: se puede invocar multiples veces sin efectos secundarios.
|
||||
|
||||
cuda_toolkit_check() {
|
||||
local nvcc_ver="missing"
|
||||
local nvidia_smi_status="missing"
|
||||
local driver_version="missing"
|
||||
local cuda_libs_path="missing"
|
||||
|
||||
# --- nvcc ---
|
||||
if command -v nvcc &>/dev/null; then
|
||||
# nvcc --version imprime algo como:
|
||||
# Cuda compilation tools, release 12.4, V12.4.131
|
||||
local raw
|
||||
raw="$(nvcc --version 2>&1)"
|
||||
# Extraer "12.4" de "release 12.4,"
|
||||
local ver
|
||||
ver="$(echo "$raw" | grep -oP 'release \K[0-9]+\.[0-9]+')"
|
||||
nvcc_ver="${ver:-present}"
|
||||
fi
|
||||
|
||||
# --- nvidia-smi + driver_version ---
|
||||
if command -v nvidia-smi &>/dev/null; then
|
||||
nvidia_smi_status="present"
|
||||
# nvidia-smi --query-gpu=driver_version --format=csv,noheader retorna la version
|
||||
local drv
|
||||
drv="$(nvidia-smi --query-gpu=driver_version --format=csv,noheader 2>/dev/null | head -n1 | tr -d ' ')"
|
||||
if [[ -n "$drv" ]]; then
|
||||
driver_version="$drv"
|
||||
fi
|
||||
fi
|
||||
|
||||
# --- cuda_libs: buscar en rutas canonicas ---
|
||||
local search_dirs=(
|
||||
"/usr/local/cuda"
|
||||
"/usr/local/cuda-"*
|
||||
"/opt/cuda"
|
||||
"/opt/cuda-"*
|
||||
"/usr/lib/x86_64-linux-gnu/libcuda.so"*
|
||||
"/usr/lib/aarch64-linux-gnu/libcuda.so"*
|
||||
)
|
||||
|
||||
for candidate in "${search_dirs[@]}"; do
|
||||
# shellcheck disable=SC2206
|
||||
# Expandir globs: si el candidato no existe el glob no expande
|
||||
for path in $candidate; do
|
||||
if [[ -e "$path" ]]; then
|
||||
# Normalizar: tomar solo el directorio raiz /usr/local/cuda*
|
||||
local base
|
||||
base="${path%%/lib*}"
|
||||
cuda_libs_path="$base"
|
||||
break 2
|
||||
fi
|
||||
done
|
||||
done
|
||||
|
||||
# Si no encontramos directorio CUDA pero si libcuda.so en rutas de lib estandar
|
||||
if [[ "$cuda_libs_path" == "missing" ]]; then
|
||||
local libcuda
|
||||
libcuda="$(ldconfig -p 2>/dev/null | grep 'libcuda\.so' | head -n1 | awk '{print $NF}')"
|
||||
if [[ -n "$libcuda" ]]; then
|
||||
cuda_libs_path="$(dirname "$libcuda")"
|
||||
fi
|
||||
fi
|
||||
|
||||
# --- overall ---
|
||||
local found_count=0
|
||||
[[ "$nvcc_ver" != "missing" ]] && ((found_count++))
|
||||
[[ "$nvidia_smi_status" != "missing" ]] && ((found_count++))
|
||||
[[ "$cuda_libs_path" != "missing" ]] && ((found_count++))
|
||||
|
||||
local overall
|
||||
if [[ $found_count -eq 0 ]]; then overall="missing"
|
||||
elif [[ $found_count -eq 3 ]]; then overall="ok"
|
||||
else overall="partial"
|
||||
fi
|
||||
|
||||
# --- emitir resultados ---
|
||||
echo "nvcc=${nvcc_ver}"
|
||||
echo "nvidia_smi=${nvidia_smi_status}"
|
||||
echo "driver_version=${driver_version}"
|
||||
echo "cuda_libs=${cuda_libs_path}"
|
||||
echo "overall=${overall}"
|
||||
}
|
||||
|
||||
# Ejecutar si se invoca directamente
|
||||
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
|
||||
cuda_toolkit_check "$@"
|
||||
fi
|
||||
@@ -0,0 +1,111 @@
|
||||
#!/usr/bin/env bash
|
||||
# Tests para cuda_toolkit_check
|
||||
# Smoke: verifica que stdout contiene todas las keys requeridas y exit code 0.
|
||||
set -uo pipefail
|
||||
# Nota: set -e NO se usa para que los asserts fallen de forma acumulativa
|
||||
# en lugar de abortar el script al primer fallo.
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
source "$SCRIPT_DIR/../cuda_toolkit_check.sh"
|
||||
|
||||
PASS=0
|
||||
FAIL=0
|
||||
|
||||
assert_eq() {
|
||||
local test_name="$1" expected="$2" got="$3"
|
||||
if [[ "$expected" == "$got" ]]; then
|
||||
echo "PASS: $test_name"
|
||||
((PASS++)) || true
|
||||
else
|
||||
echo "FAIL: $test_name — expected '$expected', got '$got'"
|
||||
((FAIL++)) || true
|
||||
fi
|
||||
}
|
||||
|
||||
assert_contains() {
|
||||
local test_name="$1" needle="$2" haystack="$3"
|
||||
if echo "$haystack" | grep -qF "$needle"; then
|
||||
echo "PASS: $test_name"
|
||||
((PASS++)) || true
|
||||
else
|
||||
echo "FAIL: $test_name — '$needle' not found in output"
|
||||
((FAIL++)) || true
|
||||
fi
|
||||
}
|
||||
|
||||
assert_matches_pattern() {
|
||||
local test_name="$1" pattern="$2" value="$3"
|
||||
if echo "$value" | grep -qE "$pattern"; then
|
||||
echo "PASS: $test_name"
|
||||
((PASS++)) || true
|
||||
else
|
||||
echo "FAIL: $test_name — '$value' does not match pattern '$pattern'"
|
||||
((FAIL++)) || true
|
||||
fi
|
||||
}
|
||||
|
||||
assert_nonempty() {
|
||||
local test_name="$1" value="$2"
|
||||
if [[ -n "$value" ]]; then
|
||||
echo "PASS: $test_name"
|
||||
((PASS++)) || true
|
||||
else
|
||||
echo "FAIL: $test_name — valor vacio"
|
||||
((FAIL++)) || true
|
||||
fi
|
||||
}
|
||||
|
||||
# --- Capturar salida ---
|
||||
OUTPUT="$(cuda_toolkit_check)"
|
||||
EXIT_CODE=$?
|
||||
|
||||
# --- Test: exit code 0 ---
|
||||
assert_eq "exit code es 0" "0" "$EXIT_CODE"
|
||||
|
||||
# --- Test: stdout contiene clave nvcc= ---
|
||||
assert_contains "stdout contiene clave nvcc=" "nvcc=" "$OUTPUT"
|
||||
|
||||
# --- Test: stdout contiene clave nvidia_smi= ---
|
||||
assert_contains "stdout contiene clave nvidia_smi=" "nvidia_smi=" "$OUTPUT"
|
||||
|
||||
# --- Test: stdout contiene clave driver_version= ---
|
||||
assert_contains "stdout contiene clave driver_version=" "driver_version=" "$OUTPUT"
|
||||
|
||||
# --- Test: stdout contiene clave cuda_libs= ---
|
||||
assert_contains "stdout contiene clave cuda_libs=" "cuda_libs=" "$OUTPUT"
|
||||
|
||||
# --- Test: stdout contiene clave overall= ---
|
||||
assert_contains "stdout contiene clave overall=" "overall=" "$OUTPUT"
|
||||
|
||||
# --- Test: overall tiene valor valido (ok|partial|missing) ---
|
||||
OVERALL_VAL="$(echo "$OUTPUT" | grep '^overall=' | cut -d= -f2)"
|
||||
assert_matches_pattern "overall tiene valor valido ok|partial|missing" "^(ok|partial|missing)$" "$OVERALL_VAL"
|
||||
|
||||
# --- Test: nvcc tiene valor no vacio ---
|
||||
NVCC_VAL="$(echo "$OUTPUT" | grep '^nvcc=' | cut -d= -f2)"
|
||||
assert_nonempty "nvcc tiene valor no vacio" "$NVCC_VAL"
|
||||
|
||||
# --- Test: nvidia_smi tiene valor valido (present|missing) ---
|
||||
SMI_VAL="$(echo "$OUTPUT" | grep '^nvidia_smi=' | cut -d= -f2)"
|
||||
assert_matches_pattern "nvidia_smi tiene valor valido present|missing" "^(present|missing)$" "$SMI_VAL"
|
||||
|
||||
# --- Test: driver_version tiene valor no vacio ---
|
||||
DRV_VAL="$(echo "$OUTPUT" | grep '^driver_version=' | cut -d= -f2)"
|
||||
assert_nonempty "driver_version tiene valor no vacio" "$DRV_VAL"
|
||||
|
||||
# --- Test: cuda_libs tiene valor no vacio ---
|
||||
LIBS_VAL="$(echo "$OUTPUT" | grep '^cuda_libs=' | cut -d= -f2)"
|
||||
assert_nonempty "cuda_libs tiene valor no vacio" "$LIBS_VAL"
|
||||
|
||||
# --- Test: exactamente 5 lineas en la salida ---
|
||||
LINE_COUNT="$(echo "$OUTPUT" | wc -l | tr -d ' ')"
|
||||
assert_eq "salida tiene exactamente 5 lineas" "5" "$LINE_COUNT"
|
||||
|
||||
# --- Test: segunda invocacion idempotente (mismo resultado) ---
|
||||
OUTPUT2="$(cuda_toolkit_check)"
|
||||
assert_eq "segunda invocacion produce mismo resultado (idempotente)" "$OUTPUT" "$OUTPUT2"
|
||||
|
||||
# --- Resumen ---
|
||||
echo "---"
|
||||
echo "Results: $PASS passed, $FAIL failed"
|
||||
[[ $FAIL -eq 0 ]] || exit 1
|
||||
Reference in New Issue
Block a user