feat: implement server-wide management actions and enhance TUI dashboard

This commit is contained in:
2026-03-04 20:51:02 +00:00
parent 150f9d2990
commit ddec55871b
13 changed files with 621 additions and 52 deletions
+57 -2
View File
@@ -50,9 +50,64 @@ read_pid() {
[[ -f "$f" ]] && cat "$f" || echo 0
}
# Map agent ID to its config path by scanning agent directories.
config_path_for() {
local target_id="$1"
for cfg in agents/*/config.yaml; do
[[ -f "$cfg" ]] || continue
local id
id=$(grep -m1 '^ id:' "$cfg" | awk '{print $2}')
if [[ "$id" == "$target_id" ]]; then
echo "$cfg"
return
fi
done
}
# Find all PIDs of launcher processes for a given agent ID.
# Searches for the actual config path in the process command line.
# Returns newline-separated PIDs (may be empty).
find_agent_pids() {
local id="$1"
local cfg; cfg="$(config_path_for "$id")"
if [[ -z "$cfg" ]]; then
return
fi
pgrep -f "launcher.*-c.*${cfg}" 2>/dev/null || true
}
is_running() {
local pid; pid="$(read_pid "$1")"
[[ "$pid" -gt 0 ]] && kill -0 "$pid" 2>/dev/null
local id="$1"
# First check PID file
local pid; pid="$(read_pid "$id")"
if [[ "$pid" -gt 0 ]] && kill -0 "$pid" 2>/dev/null; then
return 0
fi
# PID file is stale or missing — search for actual processes
local pids; pids="$(find_agent_pids "$id")"
if [[ -n "$pids" ]]; then
# Update PID file with the first found process
local first_pid; first_pid="$(echo "$pids" | head -1)"
echo "$first_pid" > "$(pid_file "$id")"
return 0
fi
# Truly not running — clean up stale PID file
[[ "$pid" -gt 0 ]] && rm -f "$(pid_file "$id")"
return 1
}
# Count how many instances of an agent are running.
count_instances() {
local id="$1"
local pids; pids="$(find_agent_pids "$id")"
if [[ -z "$pids" ]]; then
echo 0
else
echo "$pids" | wc -l
fi
}
agent_status() {
+7
View File
@@ -26,6 +26,7 @@ while IFS='|' read -r id _version _enabled _desc _cfg; do
fi
pid="$(read_pid "$id")"
instance_count="$(count_instances "$id")"
((found++)) || true
# Uptime: calcular desde el inicio del proceso
@@ -78,6 +79,12 @@ while IFS='|' read -r id _version _enabled _desc _cfg; do
printf "%-22s ${GRN}%-8s${RST} %-12s %-10s %-8s %s\n" \
"$id" "$pid" "$uptime" "$mem" "${cpu_pct}%" "$log_size"
# Warn about duplicate instances
if [[ "$instance_count" -gt 1 ]]; then
printf " ${RED}⚠ WARNING: %d instances running!${RST} PIDs: %s\n" \
"$instance_count" "$(find_agent_pids "$id" | tr '\n' ' ')"
fi
done < <(list_agents_raw)
if [[ "$found" -eq 0 ]]; then
+7 -4
View File
@@ -55,11 +55,14 @@ case "$CMD" in
killed=0
for id in "${agents[@]}"; do
pid="$(read_pid "$id")"
if [[ "$pid" -gt 0 ]] && kill -0 "$pid" 2>/dev/null; then
kill -9 "$pid" 2>/dev/null || true
all_pids="$(find_agent_pids "$id")"
if [[ -n "$all_pids" ]]; then
cnt="$(echo "$all_pids" | wc -l)"
for p in $all_pids; do
kill -9 "$p" 2>/dev/null || true
done
rm -f "$(pid_file "$id")"
ok "$id killed (PID $pid)"
ok "$id killed ($cnt instance(s), PIDs: $(echo $all_pids | tr '\n' ' '))"
((killed++)) || true
else
dim " $id (no estaba corriendo)"
+21 -2
View File
@@ -14,11 +14,30 @@ start_agent() {
local id="$1" cfg="$2"
local log; log="$(log_file "$id")"
local pid_f; pid_f="$(pid_file "$id")"
local bin="$REPO_ROOT/bin/launcher"
# Check for duplicate instances already running
local existing; existing="$(count_instances "$id")"
if [[ "$existing" -gt 0 ]]; then
warn "$id already has $existing instance(s) running (orphan processes?)"
warn " Run ./dev-scripts/stop.sh $id first to clean up"
return 1
fi
info "Iniciando $id..."
# Lanza el launcher en background, desacoplado del terminal
nohup "$GO" run -tags goolm ./cmd/launcher -c "$cfg" --log-level "${LOG_LEVEL:-info}" \
# Build the binary first to avoid go run wrapper PID issues
if [[ ! -x "$bin" ]] || [[ "$(find ./cmd/launcher -newer "$bin" 2>/dev/null | head -1)" ]]; then
info "Compilando launcher..."
mkdir -p "$(dirname "$bin")"
"$GO" build -tags goolm -o "$bin" ./cmd/launcher || {
fail "$id error de compilación — revisa el código"
return 1
}
fi
# Launch the compiled binary directly (no go run wrapper)
nohup "$bin" -c "$cfg" --log-level "${LOG_LEVEL:-info}" \
>> "$log" 2>&1 &
local pid=$!
+22 -8
View File
@@ -18,23 +18,37 @@ while IFS='|' read -r id _version _enabled _desc _cfg; do
continue
fi
local_pid="$(read_pid "$id")"
kill -TERM "$local_pid" 2>/dev/null || true
# Kill ALL instances, not just the one in the PID file
all_pids="$(find_agent_pids "$id")"
instance_count="$(echo "$all_pids" | grep -c . 2>/dev/null || echo 0)"
# Espera hasta 5s a que muera limpiamente
if [[ "$instance_count" -gt 1 ]]; then
warn "$id has $instance_count instances running — stopping all"
fi
# Send SIGTERM to all instances
for p in $all_pids; do
kill -TERM "$p" 2>/dev/null || true
done
# Wait up to 5s for graceful shutdown
for _ in {1..10}; do
kill -0 "$local_pid" 2>/dev/null || break
remaining="$(find_agent_pids "$id")"
[[ -z "$remaining" ]] && break
sleep 0.5
done
# SIGKILL si todavía sigue vivo
if kill -0 "$local_pid" 2>/dev/null; then
# SIGKILL any survivors
survivors="$(find_agent_pids "$id")"
if [[ -n "$survivors" ]]; then
warn "$id no respondió a SIGTERM, enviando SIGKILL..."
kill -9 "$local_pid" 2>/dev/null || true
for p in $survivors; do
kill -9 "$p" 2>/dev/null || true
done
fi
rm -f "$(pid_file "$id")"
ok "$id detenido (PID $local_pid)"
ok "$id detenido ($instance_count instance(s) stopped)"
((stopped++)) || true
done < <(list_agents_raw)