feat: implement server-wide management actions and enhance TUI dashboard
This commit is contained in:
+57
-2
@@ -50,9 +50,64 @@ read_pid() {
|
||||
[[ -f "$f" ]] && cat "$f" || echo 0
|
||||
}
|
||||
|
||||
# Map agent ID to its config path by scanning agent directories.
|
||||
config_path_for() {
|
||||
local target_id="$1"
|
||||
for cfg in agents/*/config.yaml; do
|
||||
[[ -f "$cfg" ]] || continue
|
||||
local id
|
||||
id=$(grep -m1 '^ id:' "$cfg" | awk '{print $2}')
|
||||
if [[ "$id" == "$target_id" ]]; then
|
||||
echo "$cfg"
|
||||
return
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
# Find all PIDs of launcher processes for a given agent ID.
|
||||
# Searches for the actual config path in the process command line.
|
||||
# Returns newline-separated PIDs (may be empty).
|
||||
find_agent_pids() {
|
||||
local id="$1"
|
||||
local cfg; cfg="$(config_path_for "$id")"
|
||||
if [[ -z "$cfg" ]]; then
|
||||
return
|
||||
fi
|
||||
pgrep -f "launcher.*-c.*${cfg}" 2>/dev/null || true
|
||||
}
|
||||
|
||||
is_running() {
|
||||
local pid; pid="$(read_pid "$1")"
|
||||
[[ "$pid" -gt 0 ]] && kill -0 "$pid" 2>/dev/null
|
||||
local id="$1"
|
||||
|
||||
# First check PID file
|
||||
local pid; pid="$(read_pid "$id")"
|
||||
if [[ "$pid" -gt 0 ]] && kill -0 "$pid" 2>/dev/null; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
# PID file is stale or missing — search for actual processes
|
||||
local pids; pids="$(find_agent_pids "$id")"
|
||||
if [[ -n "$pids" ]]; then
|
||||
# Update PID file with the first found process
|
||||
local first_pid; first_pid="$(echo "$pids" | head -1)"
|
||||
echo "$first_pid" > "$(pid_file "$id")"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Truly not running — clean up stale PID file
|
||||
[[ "$pid" -gt 0 ]] && rm -f "$(pid_file "$id")"
|
||||
return 1
|
||||
}
|
||||
|
||||
# Count how many instances of an agent are running.
|
||||
count_instances() {
|
||||
local id="$1"
|
||||
local pids; pids="$(find_agent_pids "$id")"
|
||||
if [[ -z "$pids" ]]; then
|
||||
echo 0
|
||||
else
|
||||
echo "$pids" | wc -l
|
||||
fi
|
||||
}
|
||||
|
||||
agent_status() {
|
||||
|
||||
@@ -26,6 +26,7 @@ while IFS='|' read -r id _version _enabled _desc _cfg; do
|
||||
fi
|
||||
|
||||
pid="$(read_pid "$id")"
|
||||
instance_count="$(count_instances "$id")"
|
||||
((found++)) || true
|
||||
|
||||
# Uptime: calcular desde el inicio del proceso
|
||||
@@ -78,6 +79,12 @@ while IFS='|' read -r id _version _enabled _desc _cfg; do
|
||||
printf "%-22s ${GRN}%-8s${RST} %-12s %-10s %-8s %s\n" \
|
||||
"$id" "$pid" "$uptime" "$mem" "${cpu_pct}%" "$log_size"
|
||||
|
||||
# Warn about duplicate instances
|
||||
if [[ "$instance_count" -gt 1 ]]; then
|
||||
printf " ${RED}⚠ WARNING: %d instances running!${RST} PIDs: %s\n" \
|
||||
"$instance_count" "$(find_agent_pids "$id" | tr '\n' ' ')"
|
||||
fi
|
||||
|
||||
done < <(list_agents_raw)
|
||||
|
||||
if [[ "$found" -eq 0 ]]; then
|
||||
|
||||
@@ -55,11 +55,14 @@ case "$CMD" in
|
||||
|
||||
killed=0
|
||||
for id in "${agents[@]}"; do
|
||||
pid="$(read_pid "$id")"
|
||||
if [[ "$pid" -gt 0 ]] && kill -0 "$pid" 2>/dev/null; then
|
||||
kill -9 "$pid" 2>/dev/null || true
|
||||
all_pids="$(find_agent_pids "$id")"
|
||||
if [[ -n "$all_pids" ]]; then
|
||||
cnt="$(echo "$all_pids" | wc -l)"
|
||||
for p in $all_pids; do
|
||||
kill -9 "$p" 2>/dev/null || true
|
||||
done
|
||||
rm -f "$(pid_file "$id")"
|
||||
ok "$id killed (PID $pid)"
|
||||
ok "$id killed ($cnt instance(s), PIDs: $(echo $all_pids | tr '\n' ' '))"
|
||||
((killed++)) || true
|
||||
else
|
||||
dim " $id (no estaba corriendo)"
|
||||
|
||||
+21
-2
@@ -14,11 +14,30 @@ start_agent() {
|
||||
local id="$1" cfg="$2"
|
||||
local log; log="$(log_file "$id")"
|
||||
local pid_f; pid_f="$(pid_file "$id")"
|
||||
local bin="$REPO_ROOT/bin/launcher"
|
||||
|
||||
# Check for duplicate instances already running
|
||||
local existing; existing="$(count_instances "$id")"
|
||||
if [[ "$existing" -gt 0 ]]; then
|
||||
warn "$id already has $existing instance(s) running (orphan processes?)"
|
||||
warn " Run ./dev-scripts/stop.sh $id first to clean up"
|
||||
return 1
|
||||
fi
|
||||
|
||||
info "Iniciando $id..."
|
||||
|
||||
# Lanza el launcher en background, desacoplado del terminal
|
||||
nohup "$GO" run -tags goolm ./cmd/launcher -c "$cfg" --log-level "${LOG_LEVEL:-info}" \
|
||||
# Build the binary first to avoid go run wrapper PID issues
|
||||
if [[ ! -x "$bin" ]] || [[ "$(find ./cmd/launcher -newer "$bin" 2>/dev/null | head -1)" ]]; then
|
||||
info "Compilando launcher..."
|
||||
mkdir -p "$(dirname "$bin")"
|
||||
"$GO" build -tags goolm -o "$bin" ./cmd/launcher || {
|
||||
fail "$id error de compilación — revisa el código"
|
||||
return 1
|
||||
}
|
||||
fi
|
||||
|
||||
# Launch the compiled binary directly (no go run wrapper)
|
||||
nohup "$bin" -c "$cfg" --log-level "${LOG_LEVEL:-info}" \
|
||||
>> "$log" 2>&1 &
|
||||
|
||||
local pid=$!
|
||||
|
||||
+22
-8
@@ -18,23 +18,37 @@ while IFS='|' read -r id _version _enabled _desc _cfg; do
|
||||
continue
|
||||
fi
|
||||
|
||||
local_pid="$(read_pid "$id")"
|
||||
kill -TERM "$local_pid" 2>/dev/null || true
|
||||
# Kill ALL instances, not just the one in the PID file
|
||||
all_pids="$(find_agent_pids "$id")"
|
||||
instance_count="$(echo "$all_pids" | grep -c . 2>/dev/null || echo 0)"
|
||||
|
||||
# Espera hasta 5s a que muera limpiamente
|
||||
if [[ "$instance_count" -gt 1 ]]; then
|
||||
warn "$id has $instance_count instances running — stopping all"
|
||||
fi
|
||||
|
||||
# Send SIGTERM to all instances
|
||||
for p in $all_pids; do
|
||||
kill -TERM "$p" 2>/dev/null || true
|
||||
done
|
||||
|
||||
# Wait up to 5s for graceful shutdown
|
||||
for _ in {1..10}; do
|
||||
kill -0 "$local_pid" 2>/dev/null || break
|
||||
remaining="$(find_agent_pids "$id")"
|
||||
[[ -z "$remaining" ]] && break
|
||||
sleep 0.5
|
||||
done
|
||||
|
||||
# SIGKILL si todavía sigue vivo
|
||||
if kill -0 "$local_pid" 2>/dev/null; then
|
||||
# SIGKILL any survivors
|
||||
survivors="$(find_agent_pids "$id")"
|
||||
if [[ -n "$survivors" ]]; then
|
||||
warn "$id no respondió a SIGTERM, enviando SIGKILL..."
|
||||
kill -9 "$local_pid" 2>/dev/null || true
|
||||
for p in $survivors; do
|
||||
kill -9 "$p" 2>/dev/null || true
|
||||
done
|
||||
fi
|
||||
|
||||
rm -f "$(pid_file "$id")"
|
||||
ok "$id detenido (PID $local_pid)"
|
||||
ok "$id detenido ($instance_count instance(s) stopped)"
|
||||
((stopped++)) || true
|
||||
|
||||
done < <(list_agents_raw)
|
||||
|
||||
Reference in New Issue
Block a user