chore: sync from fn-registry agent
This commit is contained in:
@@ -0,0 +1 @@
|
||||
registry.db
|
||||
@@ -0,0 +1,94 @@
|
||||
---
|
||||
name: services_api
|
||||
lang: go
|
||||
domain: infra
|
||||
version: 0.1.0
|
||||
description: "Backend HTTP que monitoriza apps con tag 'service' en el PC local y en PCs remotos via SSH. Loop de checks cada 15s, persistencia en operations.db. Frontend dedicado: app C++ services_monitor. Issue 0106."
|
||||
tags: [service, monitoring, sqlite, ssh, systemd, http]
|
||||
uses_functions:
|
||||
- sqlite_apply_versioned_migrations_go_infra
|
||||
- ssh_exec_go_infra
|
||||
uses_types:
|
||||
- ssh_conn_go_infra
|
||||
framework: "net/http"
|
||||
entry_point: "main.go"
|
||||
dir_path: "apps/services_api"
|
||||
service:
|
||||
port: 8485
|
||||
health_endpoint: /api/health
|
||||
health_timeout_s: 3
|
||||
systemd_unit: services_api.service
|
||||
systemd_scope: user
|
||||
restart_policy: always
|
||||
runtime: systemd-user
|
||||
pc_targets:
|
||||
- aurgi-pc
|
||||
- home-wsl
|
||||
is_local_only: false
|
||||
---
|
||||
|
||||
# services_api
|
||||
|
||||
## Que hace
|
||||
|
||||
Monitor cross-PC de los services del registry (issue 0106).
|
||||
|
||||
1. Lee `registry.db` (read-only) → apps con `tag: service` + `service_targets`.
|
||||
2. Loop cada 15s: por cada (app, pc) chequea systemd + port listening + HTTP health.
|
||||
3. Local (pc == self): `systemctl is-active`, TCP dial, `http.Client`.
|
||||
4. Remoto: mismo set de comandos a traves de `ssh_exec_go_infra`. Si no hay ruta SSH → `no-route`.
|
||||
5. Persistencia en `operations.db`: `service_state` (estado actual) + `service_transition` (cambios).
|
||||
|
||||
## Cuando usarla
|
||||
|
||||
- Saber de un vistazo que services estan vivos en cada PC.
|
||||
- Detectar caidas silenciosas (ej. `sqlite_api` murio 20h sin alerta el 2026-05-17).
|
||||
- Auditar cobertura por PC antes de un deploy.
|
||||
|
||||
## Lanzar
|
||||
|
||||
```bash
|
||||
cd apps/services_api
|
||||
CGO_ENABLED=1 go build -tags fts5 -o services_api .
|
||||
./services_api --bind 127.0.0.1:8485
|
||||
# o ./services_api --once para un solo ciclo (smoke)
|
||||
```
|
||||
|
||||
Abrir `http://127.0.0.1:8485` en el navegador.
|
||||
|
||||
## Endpoints
|
||||
|
||||
| Ruta | Que devuelve |
|
||||
|---|---|
|
||||
| `GET /api/health` | `{"status":"ok","self_pc":"..."}` |
|
||||
| `GET /api/services` | Snapshot completo (`services[]`) + `self_pc` + `ts` |
|
||||
| `POST /api/check` | Fuerza un ciclo de checks; bloquea hasta completar |
|
||||
| `GET /api/pcs` | Lista PCs con `services_count` |
|
||||
|
||||
## Gotchas
|
||||
|
||||
- **PCs remotos sin entrada en `~/.ssh/config`** quedan en `no-route`/`unknown`. Anadir alias antes de esperar status real (ej. `aurgi-pc` no esta en config hoy).
|
||||
- **Solo lectura de registry.db**; no escribe en el. Las migraciones son sobre la propia operations.db de la app (`apps/services_api/operations.db`, tabla `service_state` + `service_transition`).
|
||||
- **`overall`** se computa por runtime:
|
||||
- `systemd-*` / `stdio`: requiere `systemctl is-active == active`. Si hay puerto, ademas TCP + HTTP 2xx/3xx.
|
||||
- `docker-compose` / `manual`: solo puerto + HTTP.
|
||||
- **Reload de targets** cada 5 min (background) sin reiniciar el proceso. Cambios en `app.md` requieren `./fn index` primero.
|
||||
- **No expone tools mutadoras** en v1 (start/stop/restart). Solo alerta. Issue 0106 marca auto-fix detras de feature flag para v2.
|
||||
|
||||
## Validacion
|
||||
|
||||
```bash
|
||||
./services_api --once && \
|
||||
sqlite3 operations.db \
|
||||
"SELECT app_id, pc_id, overall FROM service_state ORDER BY app_id, pc_id;"
|
||||
```
|
||||
|
||||
|
||||
## Capability growth log
|
||||
|
||||
Una linea por bump SemVer. Bump-type segun `.claude/commands/version.md`:
|
||||
- `major`: breaking observable (CLI args, schema BBDD propia, formato wire).
|
||||
- `minor`: feature aditiva (nuevo panel, endpoint, opcion).
|
||||
- `patch`: bugfix sin cambio observable.
|
||||
|
||||
- v0.1.0 (2026-05-18) — baseline.
|
||||
@@ -0,0 +1,532 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"net"
|
||||
"net/http"
|
||||
"os/exec"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"fn-registry/functions/infra"
|
||||
)
|
||||
|
||||
// Target is one (app, pc) combination derived from registry.db apps tagged
|
||||
// service + service_targets.
|
||||
type Target struct {
|
||||
AppID string
|
||||
Name string
|
||||
Runtime string
|
||||
Port int
|
||||
HealthEndpoint string
|
||||
HealthTimeoutS int
|
||||
SystemdUnit string
|
||||
SystemdScope string
|
||||
IsLocalOnly bool
|
||||
PCID string
|
||||
}
|
||||
|
||||
// ServiceCheck is the result of one probe of a target.
|
||||
type ServiceCheck struct {
|
||||
AppID string
|
||||
PCID string
|
||||
SystemdState string
|
||||
PortListening bool
|
||||
HTTPStatus int
|
||||
HTTPLatencyMs int
|
||||
Error string
|
||||
Overall string
|
||||
}
|
||||
|
||||
// loadTargets queries registry.db for the cross product of services x pc_targets.
|
||||
func loadTargets(registryRoot string) ([]Target, error) {
|
||||
dbPath := registryRoot + "/registry.db"
|
||||
db, err := sql.Open("sqlite3", dbPath+"?mode=ro&_journal_mode=WAL")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("open registry.db: %w", err)
|
||||
}
|
||||
defer db.Close()
|
||||
|
||||
rows, err := db.Query(`
|
||||
SELECT a.id, a.name,
|
||||
COALESCE(a.service_runtime,''),
|
||||
COALESCE(a.service_port,0),
|
||||
COALESCE(a.service_health_endpoint,''),
|
||||
COALESCE(a.service_health_timeout_s,3),
|
||||
COALESCE(a.service_systemd_unit,''),
|
||||
COALESCE(a.service_systemd_scope,''),
|
||||
COALESCE(a.service_is_local_only,0),
|
||||
t.pc_id
|
||||
FROM apps a
|
||||
JOIN service_targets t ON t.app_id = a.id
|
||||
WHERE a.tags LIKE '%service%'
|
||||
ORDER BY a.name, t.pc_id
|
||||
`)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("query targets: %w", err)
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
var out []Target
|
||||
for rows.Next() {
|
||||
var t Target
|
||||
var localOnly int
|
||||
if err := rows.Scan(
|
||||
&t.AppID, &t.Name, &t.Runtime, &t.Port, &t.HealthEndpoint, &t.HealthTimeoutS,
|
||||
&t.SystemdUnit, &t.SystemdScope, &localOnly, &t.PCID,
|
||||
); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
t.IsLocalOnly = localOnly != 0
|
||||
if t.HealthTimeoutS <= 0 {
|
||||
t.HealthTimeoutS = 3
|
||||
}
|
||||
out = append(out, t)
|
||||
}
|
||||
return out, rows.Err()
|
||||
}
|
||||
|
||||
// probeTarget probes one target and returns a ServiceCheck.
|
||||
func probeTarget(t Target, selfPC string) ServiceCheck {
|
||||
c := ServiceCheck{AppID: t.AppID, PCID: t.PCID}
|
||||
|
||||
// Local check only when the target lives on selfPC. Remote PCs always go
|
||||
// over SSH; if SSH has no route the probe returns "no-route" — that is
|
||||
// exactly what we want to surface "PC unreachable" upstream.
|
||||
if t.PCID == selfPC {
|
||||
probeLocal(&c, t)
|
||||
} else {
|
||||
probeRemote(&c, t)
|
||||
}
|
||||
c.Overall = computeOverall(t, c)
|
||||
return c
|
||||
}
|
||||
|
||||
func probeLocal(c *ServiceCheck, t Target) {
|
||||
// systemd state.
|
||||
if t.SystemdUnit != "" {
|
||||
c.SystemdState = systemctlIsActiveLocal(t.SystemdUnit, t.SystemdScope)
|
||||
} else {
|
||||
c.SystemdState = "n/a"
|
||||
}
|
||||
|
||||
// Port listening (TCP localhost).
|
||||
if t.Port > 0 {
|
||||
c.PortListening = portListeningLocal(t.Port, time.Duration(t.HealthTimeoutS)*time.Second)
|
||||
}
|
||||
|
||||
// HTTP health.
|
||||
if t.Port > 0 && t.HealthEndpoint != "" {
|
||||
status, latency, err := httpProbe("127.0.0.1", t.Port, t.HealthEndpoint, time.Duration(t.HealthTimeoutS)*time.Second)
|
||||
c.HTTPStatus = status
|
||||
c.HTTPLatencyMs = latency
|
||||
if err != nil {
|
||||
c.Error = err.Error()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func probeRemote(c *ServiceCheck, t Target) {
|
||||
conn := infra.SSHConn{Host: t.PCID}
|
||||
|
||||
// docker-compose runtime: state via `docker compose ls`, no systemctl.
|
||||
if t.Runtime == "docker-compose" {
|
||||
probeDockerComposeRemote(c, t, conn)
|
||||
return
|
||||
}
|
||||
|
||||
// systemd state via ssh.
|
||||
if t.SystemdUnit != "" {
|
||||
cmd := fmt.Sprintf("systemctl is-active %s", shellEscape(t.SystemdUnit))
|
||||
if t.SystemdScope == "user" {
|
||||
cmd = "systemctl --user is-active " + shellEscape(t.SystemdUnit)
|
||||
}
|
||||
stdout, stderr, code, err := infra.SSHExec(conn, cmd)
|
||||
// SSH transport failure (ssh exit 255 = resolve/connect refused/auth)
|
||||
// or local exec error → mark unreachable. systemctl exit codes 3 ("inactive"),
|
||||
// 4 ("unit not found") are still semantic answers from a reachable host.
|
||||
if err != nil || code == 255 || code < 0 {
|
||||
c.SystemdState = "no-route"
|
||||
detail := strings.TrimSpace(stderr)
|
||||
if err != nil {
|
||||
detail = err.Error()
|
||||
}
|
||||
c.Error = detail
|
||||
return
|
||||
}
|
||||
// systemctl returns "inactive" exit 3 when unit is stopped OR missing.
|
||||
// Distinguish via stderr (unit not found).
|
||||
stderrLower := strings.ToLower(stderr)
|
||||
if strings.Contains(stderrLower, "could not be found") ||
|
||||
strings.Contains(stderrLower, "not-found") ||
|
||||
strings.Contains(stderrLower, "not found") {
|
||||
c.SystemdState = "not-installed"
|
||||
return
|
||||
}
|
||||
c.SystemdState = strings.TrimSpace(stdout)
|
||||
if c.SystemdState == "" {
|
||||
c.SystemdState = "unknown"
|
||||
}
|
||||
} else {
|
||||
c.SystemdState = "n/a"
|
||||
}
|
||||
|
||||
if t.Port > 0 {
|
||||
// Port listening via ss on remote.
|
||||
cmd := fmt.Sprintf("ss -tln '( sport = :%d )' | tail -n +2 | head -n1", t.Port)
|
||||
stdout, _, _, err := infra.SSHExec(conn, cmd)
|
||||
if err == nil && strings.TrimSpace(stdout) != "" {
|
||||
c.PortListening = true
|
||||
}
|
||||
|
||||
// HTTP probe via curl on remote.
|
||||
if t.HealthEndpoint != "" {
|
||||
url := fmt.Sprintf("http://127.0.0.1:%d%s", t.Port, t.HealthEndpoint)
|
||||
fmtCmd := `curl -s -o /dev/null -w "%{http_code} %{time_total}" -m ` +
|
||||
strconv.Itoa(t.HealthTimeoutS) + " " + shellEscape(url)
|
||||
stdout, stderr, _, err := infra.SSHExec(conn, fmtCmd)
|
||||
if err != nil {
|
||||
c.Error = strings.TrimSpace(stderr)
|
||||
return
|
||||
}
|
||||
parts := strings.Fields(strings.TrimSpace(stdout))
|
||||
if len(parts) >= 1 {
|
||||
if code, perr := strconv.Atoi(parts[0]); perr == nil {
|
||||
c.HTTPStatus = code
|
||||
}
|
||||
}
|
||||
if len(parts) >= 2 {
|
||||
if secs, perr := strconv.ParseFloat(parts[1], 64); perr == nil {
|
||||
c.HTTPLatencyMs = int(secs * 1000)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func computeOverall(t Target, c ServiceCheck) string {
|
||||
if strings.HasPrefix(c.SystemdState, "no-route") {
|
||||
return "no-route"
|
||||
}
|
||||
if c.SystemdState == "not-installed" {
|
||||
return "not-installed"
|
||||
}
|
||||
|
||||
// When the probe returned a systemd is-active style answer, route through
|
||||
// the systemd branch regardless of the declared runtime. This covers the
|
||||
// case where an app advertises `runtime: docker-compose` (because it lives
|
||||
// on the primary VPS) but also runs as a bare systemd unit on a developer
|
||||
// PC. The PC-local probe always asks systemctl; trust its output.
|
||||
switch c.SystemdState {
|
||||
case "active", "inactive", "failed", "activating", "deactivating", "reloading", "unknown", "n/a":
|
||||
// fallthrough to systemd-style aggregation below
|
||||
default:
|
||||
if t.Runtime == "docker-compose" {
|
||||
if c.SystemdState == "" {
|
||||
return "unknown"
|
||||
}
|
||||
ran, exited, _ := parseComposeCounts(c.SystemdState)
|
||||
if ran == 0 {
|
||||
return "down"
|
||||
}
|
||||
if exited > 0 {
|
||||
return "degraded"
|
||||
}
|
||||
if t.Port > 0 && t.HealthEndpoint != "" &&
|
||||
(c.HTTPStatus > 0 && (c.HTTPStatus < 200 || c.HTTPStatus >= 400)) {
|
||||
return "degraded"
|
||||
}
|
||||
return "ok"
|
||||
}
|
||||
}
|
||||
|
||||
// Systemd-runtime apps.
|
||||
if strings.HasPrefix(t.Runtime, "systemd-") || t.Runtime == "stdio" || c.SystemdState == "active" || c.SystemdState == "inactive" || c.SystemdState == "failed" {
|
||||
if c.SystemdState != "active" {
|
||||
return "down"
|
||||
}
|
||||
if t.Port > 0 {
|
||||
if !c.PortListening {
|
||||
return "degraded"
|
||||
}
|
||||
if t.HealthEndpoint != "" && (c.HTTPStatus < 200 || c.HTTPStatus >= 400) {
|
||||
return "degraded"
|
||||
}
|
||||
}
|
||||
return "ok"
|
||||
}
|
||||
|
||||
// manual / unknown runtime: lean on port + http only.
|
||||
if t.Port > 0 {
|
||||
if !c.PortListening {
|
||||
return "down"
|
||||
}
|
||||
if t.HealthEndpoint != "" && (c.HTTPStatus < 200 || c.HTTPStatus >= 400) {
|
||||
return "degraded"
|
||||
}
|
||||
return "ok"
|
||||
}
|
||||
|
||||
return "unknown"
|
||||
}
|
||||
|
||||
// parseComposeCounts parses a docker-compose `ls` status string like
|
||||
// "running(10)", "running(8) | exited(2)" and returns (running, exited, total).
|
||||
// Tolerant: unknown tokens map to 0.
|
||||
func parseComposeCounts(s string) (running, exited, total int) {
|
||||
parts := strings.Split(s, "|")
|
||||
for _, p := range parts {
|
||||
p = strings.TrimSpace(p)
|
||||
// Format: kind(N)
|
||||
open := strings.Index(p, "(")
|
||||
closeIdx := strings.Index(p, ")")
|
||||
if open == -1 || closeIdx == -1 || closeIdx < open {
|
||||
continue
|
||||
}
|
||||
kind := strings.TrimSpace(p[:open])
|
||||
n, err := strconv.Atoi(strings.TrimSpace(p[open+1 : closeIdx]))
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
switch kind {
|
||||
case "running":
|
||||
running += n
|
||||
case "exited", "stopped", "dead":
|
||||
exited += n
|
||||
}
|
||||
total += n
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// systemctlIsActiveLocal runs `systemctl [--user] is-active <unit>` and returns
|
||||
// a normalized state. Distinguishes "not-installed" (unit file absent) from
|
||||
// "inactive" (unit present but stopped). Both are unhealthy but mean different
|
||||
// things for the human: not-installed = forget systemctl, deploy the unit.
|
||||
func systemctlIsActiveLocal(unit, scope string) string {
|
||||
args := []string{"is-active", unit}
|
||||
if scope == "user" {
|
||||
args = append([]string{"--user"}, args...)
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
|
||||
defer cancel()
|
||||
cmd := exec.CommandContext(ctx, "systemctl", args...)
|
||||
var so, se strings.Builder
|
||||
cmd.Stdout = &so
|
||||
cmd.Stderr = &se
|
||||
_ = cmd.Run()
|
||||
state := strings.TrimSpace(so.String())
|
||||
stderr := strings.ToLower(se.String())
|
||||
if strings.Contains(stderr, "could not be found") ||
|
||||
strings.Contains(stderr, "not-found") ||
|
||||
strings.Contains(stderr, "not found") {
|
||||
return "not-installed"
|
||||
}
|
||||
if state == "" {
|
||||
return "unknown"
|
||||
}
|
||||
return state
|
||||
}
|
||||
|
||||
func portListeningLocal(port int, timeout time.Duration) bool {
|
||||
conn, err := net.DialTimeout("tcp", fmt.Sprintf("127.0.0.1:%d", port), timeout)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
conn.Close()
|
||||
return true
|
||||
}
|
||||
|
||||
func httpProbe(host string, port int, path string, timeout time.Duration) (int, int, error) {
|
||||
client := &http.Client{Timeout: timeout}
|
||||
url := fmt.Sprintf("http://%s:%d%s", host, port, path)
|
||||
start := time.Now()
|
||||
resp, err := client.Get(url)
|
||||
latencyMs := int(time.Since(start) / time.Millisecond)
|
||||
if err != nil {
|
||||
return 0, latencyMs, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
return resp.StatusCode, latencyMs, nil
|
||||
}
|
||||
|
||||
// probeDockerComposeRemote queries `docker compose ls --format json` over SSH
|
||||
// and maps the running/expected ratio into a synthetic systemd_state-like value
|
||||
// + port/HTTP probes already declared.
|
||||
//
|
||||
// Project name defaults to t.Name (the app's frontmatter `name`). Override
|
||||
// later via a `docker_compose_project` field if needed.
|
||||
func probeDockerComposeRemote(c *ServiceCheck, t Target, conn infra.SSHConn) {
|
||||
project := t.Name
|
||||
|
||||
// `docker compose ls --format json` returns array of {Name, Status, ConfigFiles}.
|
||||
// Status string examples: "running(10)", "running(8) | exited(2)", "exited(5)".
|
||||
cmd := "docker compose ls --all --format json"
|
||||
stdout, stderr, code, err := infra.SSHExec(conn, cmd)
|
||||
if err != nil || code != 0 {
|
||||
c.SystemdState = "no-route"
|
||||
detail := strings.TrimSpace(stderr)
|
||||
if err != nil {
|
||||
detail = err.Error()
|
||||
}
|
||||
c.Error = detail
|
||||
return
|
||||
}
|
||||
|
||||
// Tolerant parse: find the JSON object whose "Name" matches the project.
|
||||
// Try variants: the app's name, hyphenized, and slashes-stripped.
|
||||
candidates := []string{project, strings.ReplaceAll(project, "_", "-")}
|
||||
var status string
|
||||
var found bool
|
||||
var matched string
|
||||
for _, cand := range candidates {
|
||||
if cand == "" {
|
||||
continue
|
||||
}
|
||||
if s, ok := extractComposeStatus(stdout, cand); ok {
|
||||
status, found, matched = s, true, cand
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
c.SystemdState = "not-installed"
|
||||
c.Error = "no docker compose project named " + project +
|
||||
" (tried " + strings.Join(candidates, ", ") + ")"
|
||||
return
|
||||
}
|
||||
_ = matched // reserved for future telemetry
|
||||
|
||||
c.SystemdState = status
|
||||
if t.Port > 0 {
|
||||
probeRemotePort(c, t, conn)
|
||||
}
|
||||
}
|
||||
|
||||
// extractComposeStatus parses the JSON array emitted by
|
||||
// `docker compose ls --format json` and returns the Status field of the entry
|
||||
// with Name == project. Tolerant to whitespace and field ordering.
|
||||
func extractComposeStatus(jsonBlob, project string) (string, bool) {
|
||||
// Naive but reliable: find `"Name":"<project>"` and the nearest
|
||||
// `"Status":"..."` field within the same object.
|
||||
needle := `"Name":"` + project + `"`
|
||||
idx := strings.Index(jsonBlob, needle)
|
||||
if idx == -1 {
|
||||
// Try alternative: "Name": "project" with space.
|
||||
needle2 := `"Name": "` + project + `"`
|
||||
idx = strings.Index(jsonBlob, needle2)
|
||||
if idx == -1 {
|
||||
return "", false
|
||||
}
|
||||
}
|
||||
// Bound the search window to the surrounding object.
|
||||
open := strings.LastIndex(jsonBlob[:idx], "{")
|
||||
close := strings.Index(jsonBlob[idx:], "}")
|
||||
if open == -1 || close == -1 {
|
||||
return "", false
|
||||
}
|
||||
obj := jsonBlob[open : idx+close+1]
|
||||
// Find "Status":"..."
|
||||
const stKey = `"Status":`
|
||||
si := strings.Index(obj, stKey)
|
||||
if si == -1 {
|
||||
return "", false
|
||||
}
|
||||
// Skip whitespace and opening quote.
|
||||
rest := obj[si+len(stKey):]
|
||||
rest = strings.TrimLeft(rest, " \t")
|
||||
if !strings.HasPrefix(rest, `"`) {
|
||||
return "", false
|
||||
}
|
||||
rest = rest[1:]
|
||||
end := strings.Index(rest, `"`)
|
||||
if end == -1 {
|
||||
return "", false
|
||||
}
|
||||
return rest[:end], true
|
||||
}
|
||||
|
||||
// probeRemotePort runs the port + http probes when a docker compose project
|
||||
// declares a port that should be reachable on the remote host's loopback.
|
||||
func probeRemotePort(c *ServiceCheck, t Target, conn infra.SSHConn) {
|
||||
cmd := fmt.Sprintf("ss -tln '( sport = :%d )' | tail -n +2 | head -n1", t.Port)
|
||||
stdout, _, _, err := infra.SSHExec(conn, cmd)
|
||||
if err == nil && strings.TrimSpace(stdout) != "" {
|
||||
c.PortListening = true
|
||||
}
|
||||
if t.HealthEndpoint == "" {
|
||||
return
|
||||
}
|
||||
url := fmt.Sprintf("http://127.0.0.1:%d%s", t.Port, t.HealthEndpoint)
|
||||
fmtCmd := `curl -s -o /dev/null -w "%{http_code} %{time_total}" -m ` +
|
||||
strconv.Itoa(t.HealthTimeoutS) + " " + shellEscape(url)
|
||||
stdout, stderr, _, err := infra.SSHExec(conn, fmtCmd)
|
||||
if err != nil {
|
||||
c.Error = strings.TrimSpace(stderr)
|
||||
return
|
||||
}
|
||||
parts := strings.Fields(strings.TrimSpace(stdout))
|
||||
if len(parts) >= 1 {
|
||||
if code, perr := strconv.Atoi(parts[0]); perr == nil {
|
||||
c.HTTPStatus = code
|
||||
}
|
||||
}
|
||||
if len(parts) >= 2 {
|
||||
if secs, perr := strconv.ParseFloat(parts[1], 64); perr == nil {
|
||||
c.HTTPLatencyMs = int(secs * 1000)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// restartTarget runs `systemctl [--user] restart <unit>` locally or via SSH
|
||||
// depending on whether the target lives on selfPC or a remote PC.
|
||||
//
|
||||
// Returns (stdout, stderr, exit_code, transport_error).
|
||||
// - transport_error != nil when ssh / exec failed to even run (DNS, network).
|
||||
// In that case exit_code is -1.
|
||||
// - exit_code != 0 when the command ran but systemctl rejected the request
|
||||
// (unit not found, permission denied, etc.). transport_error is nil.
|
||||
func restartTarget(t Target, selfPC string) (string, string, int, error) {
|
||||
if t.SystemdUnit == "" {
|
||||
return "", "no systemd_unit declared in app.md `service:` block", -1,
|
||||
fmt.Errorf("no systemd_unit")
|
||||
}
|
||||
|
||||
if t.PCID == selfPC || t.IsLocalOnly {
|
||||
args := []string{"restart", t.SystemdUnit}
|
||||
if t.SystemdScope == "user" {
|
||||
args = append([]string{"--user"}, args...)
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
defer cancel()
|
||||
cmd := exec.CommandContext(ctx, "systemctl", args...)
|
||||
var so, se strings.Builder
|
||||
cmd.Stdout = &so
|
||||
cmd.Stderr = &se
|
||||
err := cmd.Run()
|
||||
code := 0
|
||||
if exitErr, ok := err.(*exec.ExitError); ok {
|
||||
code = exitErr.ExitCode()
|
||||
err = nil
|
||||
} else if err != nil {
|
||||
return so.String(), se.String(), -1, err
|
||||
}
|
||||
return so.String(), se.String(), code, nil
|
||||
}
|
||||
|
||||
// Remote via SSH.
|
||||
conn := infra.SSHConn{Host: t.PCID}
|
||||
cmd := fmt.Sprintf("systemctl restart %s", shellEscape(t.SystemdUnit))
|
||||
if t.SystemdScope == "user" {
|
||||
cmd = "systemctl --user restart " + shellEscape(t.SystemdUnit)
|
||||
}
|
||||
stdout, stderr, code, err := infra.SSHExec(conn, cmd)
|
||||
return stdout, stderr, code, err
|
||||
}
|
||||
|
||||
func shellEscape(s string) string {
|
||||
// Conservative single-quote escape for arguments embedded in remote commands.
|
||||
return "'" + strings.ReplaceAll(s, "'", "'\\''") + "'"
|
||||
}
|
||||
@@ -0,0 +1,145 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"embed"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"fn-registry/functions/infra"
|
||||
)
|
||||
|
||||
//go:embed migrations/*.sql
|
||||
var migrationsFS embed.FS
|
||||
|
||||
// openOpsDB opens the operations.db and applies pending migrations.
|
||||
func openOpsDB(path string) (*sql.DB, error) {
|
||||
db, err := sql.Open("sqlite3", path+"?_journal_mode=WAL&_busy_timeout=5000")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("open ops db: %w", err)
|
||||
}
|
||||
if err := infra.ApplyVersionedMigrations(db, migrationsFS, "migrations"); err != nil {
|
||||
db.Close()
|
||||
return nil, fmt.Errorf("apply migrations: %w", err)
|
||||
}
|
||||
return db, nil
|
||||
}
|
||||
|
||||
// upsertState writes the latest check into service_state. When `overall` changed
|
||||
// vs previous row, also appends a row to service_transition for audit.
|
||||
func upsertState(db *sql.DB, c ServiceCheck) error {
|
||||
now := time.Now().Unix()
|
||||
|
||||
var prevOverall string
|
||||
row := db.QueryRow(
|
||||
"SELECT overall FROM service_state WHERE app_id = ? AND pc_id = ?",
|
||||
c.AppID, c.PCID,
|
||||
)
|
||||
if err := row.Scan(&prevOverall); err != nil && err != sql.ErrNoRows {
|
||||
return err
|
||||
}
|
||||
|
||||
changeTS := now
|
||||
if prevOverall == c.Overall {
|
||||
// Preserve last_change_ts.
|
||||
var oldChange int64
|
||||
_ = db.QueryRow(
|
||||
"SELECT last_change_ts FROM service_state WHERE app_id = ? AND pc_id = ?",
|
||||
c.AppID, c.PCID,
|
||||
).Scan(&oldChange)
|
||||
if oldChange > 0 {
|
||||
changeTS = oldChange
|
||||
}
|
||||
}
|
||||
|
||||
listening := 0
|
||||
if c.PortListening {
|
||||
listening = 1
|
||||
}
|
||||
|
||||
_, err := db.Exec(`
|
||||
INSERT INTO service_state
|
||||
(app_id, pc_id, systemd_state, port_listening, http_status, http_latency_ms,
|
||||
last_check_ts, last_change_ts, last_error, overall)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
ON CONFLICT(app_id, pc_id) DO UPDATE SET
|
||||
systemd_state = excluded.systemd_state,
|
||||
port_listening = excluded.port_listening,
|
||||
http_status = excluded.http_status,
|
||||
http_latency_ms = excluded.http_latency_ms,
|
||||
last_check_ts = excluded.last_check_ts,
|
||||
last_change_ts = excluded.last_change_ts,
|
||||
last_error = excluded.last_error,
|
||||
overall = excluded.overall
|
||||
`, c.AppID, c.PCID, c.SystemdState, listening, c.HTTPStatus, c.HTTPLatencyMs,
|
||||
now, changeTS, c.Error, c.Overall)
|
||||
if err != nil {
|
||||
return fmt.Errorf("upsert state: %w", err)
|
||||
}
|
||||
|
||||
if prevOverall != "" && prevOverall != c.Overall {
|
||||
_, err = db.Exec(`
|
||||
INSERT INTO service_transition (ts, app_id, pc_id, from_state, to_state, detail)
|
||||
VALUES (?, ?, ?, ?, ?, ?)
|
||||
`, now, c.AppID, c.PCID, prevOverall, c.Overall, c.Error)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// StateRow is one row of /api/services output.
|
||||
type StateRow struct {
|
||||
AppID string `json:"app_id"`
|
||||
AppName string `json:"app_name"`
|
||||
PCID string `json:"pc_id"`
|
||||
IsSelf bool `json:"is_self"`
|
||||
Reachable bool `json:"reachable"`
|
||||
Runtime string `json:"runtime"`
|
||||
Port int `json:"port"`
|
||||
HealthEndpoint string `json:"health_endpoint"`
|
||||
SystemdUnit string `json:"systemd_unit"`
|
||||
SystemdScope string `json:"systemd_scope"`
|
||||
SystemdState string `json:"systemd_state"`
|
||||
PortListening bool `json:"port_listening"`
|
||||
HTTPStatus int `json:"http_status"`
|
||||
HTTPLatencyMs int `json:"http_latency_ms"`
|
||||
LastCheckTS int64 `json:"last_check_ts"`
|
||||
LastChangeTS int64 `json:"last_change_ts"`
|
||||
LastError string `json:"last_error"`
|
||||
Overall string `json:"overall"`
|
||||
}
|
||||
|
||||
// loadStates returns the latest snapshot from service_state joined with the
|
||||
// declared service metadata from registry.db (already merged in memory by the
|
||||
// caller through the targetsCache).
|
||||
func loadStates(opsDB *sql.DB, targets []Target, selfPC string) ([]StateRow, error) {
|
||||
rows := make([]StateRow, 0, len(targets))
|
||||
for _, t := range targets {
|
||||
r := StateRow{
|
||||
AppID: t.AppID,
|
||||
AppName: t.Name,
|
||||
PCID: t.PCID,
|
||||
IsSelf: t.PCID == selfPC,
|
||||
Runtime: t.Runtime,
|
||||
Port: t.Port,
|
||||
HealthEndpoint: t.HealthEndpoint,
|
||||
SystemdUnit: t.SystemdUnit,
|
||||
SystemdScope: t.SystemdScope,
|
||||
Overall: "unknown",
|
||||
}
|
||||
var listening int
|
||||
err := opsDB.QueryRow(`
|
||||
SELECT systemd_state, port_listening, http_status, http_latency_ms,
|
||||
last_check_ts, last_change_ts, last_error, overall
|
||||
FROM service_state WHERE app_id = ? AND pc_id = ?
|
||||
`, t.AppID, t.PCID).Scan(
|
||||
&r.SystemdState, &listening, &r.HTTPStatus, &r.HTTPLatencyMs,
|
||||
&r.LastCheckTS, &r.LastChangeTS, &r.LastError, &r.Overall,
|
||||
)
|
||||
if err != nil && err != sql.ErrNoRows {
|
||||
return nil, err
|
||||
}
|
||||
r.PortListening = listening != 0
|
||||
rows = append(rows, r)
|
||||
}
|
||||
return rows, nil
|
||||
}
|
||||
@@ -0,0 +1,48 @@
|
||||
module services_api
|
||||
|
||||
go 1.25.0
|
||||
|
||||
require fn-registry v0.0.0
|
||||
|
||||
require (
|
||||
github.com/ClickHouse/ch-go v0.71.0 // indirect
|
||||
github.com/ClickHouse/clickhouse-go/v2 v2.44.0 // indirect
|
||||
github.com/andybalholm/brotli v1.2.0 // indirect
|
||||
github.com/apache/arrow-go/v18 v18.1.0 // indirect
|
||||
github.com/cespare/xxhash/v2 v2.3.0 // indirect
|
||||
github.com/go-faster/city v1.0.1 // indirect
|
||||
github.com/go-faster/errors v0.7.1 // indirect
|
||||
github.com/go-viper/mapstructure/v2 v2.2.1 // indirect
|
||||
github.com/goccy/go-json v0.10.5 // indirect
|
||||
github.com/google/flatbuffers v25.1.24+incompatible // indirect
|
||||
github.com/google/uuid v1.6.0 // indirect
|
||||
github.com/jackc/pgpassfile v1.0.0 // indirect
|
||||
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
|
||||
github.com/jackc/pgx/v5 v5.9.1 // indirect
|
||||
github.com/jackc/puddle/v2 v2.2.2 // indirect
|
||||
github.com/klauspost/compress v1.18.3 // indirect
|
||||
github.com/klauspost/cpuid/v2 v2.2.9 // indirect
|
||||
github.com/marcboeker/go-duckdb v1.8.5 // indirect
|
||||
github.com/mattn/go-sqlite3 v1.14.37
|
||||
github.com/paulmach/orb v0.12.0 // indirect
|
||||
github.com/pierrec/lz4/v4 v4.1.25 // indirect
|
||||
github.com/segmentio/asm v1.2.1 // indirect
|
||||
github.com/shopspring/decimal v1.4.0 // indirect
|
||||
github.com/zeebo/xxh3 v1.0.2 // indirect
|
||||
go.opentelemetry.io/otel v1.41.0 // indirect
|
||||
go.opentelemetry.io/otel/trace v1.41.0 // indirect
|
||||
go.yaml.in/yaml/v3 v3.0.4 // indirect
|
||||
golang.org/x/crypto v0.50.0 // indirect
|
||||
golang.org/x/exp v0.0.0-20250128182459-e0ece0dbea4c // indirect
|
||||
golang.org/x/mod v0.34.0 // indirect
|
||||
golang.org/x/sync v0.20.0 // indirect
|
||||
golang.org/x/sys v0.43.0 // indirect
|
||||
golang.org/x/telemetry v0.0.0-20260311193753-579e4da9a98c // indirect
|
||||
golang.org/x/text v0.36.0 // indirect
|
||||
golang.org/x/tools v0.43.0 // indirect
|
||||
golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da // indirect
|
||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||
nhooyr.io/websocket v1.8.17 // indirect
|
||||
)
|
||||
|
||||
replace fn-registry => ../../
|
||||
@@ -0,0 +1,174 @@
|
||||
github.com/ClickHouse/ch-go v0.71.0 h1:bUdZ/EZj/LcVHsMqaRUP2holqygrPWQKeMjc6nZoyRM=
|
||||
github.com/ClickHouse/ch-go v0.71.0/go.mod h1:NwbNc+7jaqfY58dmdDUbG4Jl22vThgx1cYjBw0vtgXw=
|
||||
github.com/ClickHouse/clickhouse-go/v2 v2.44.0 h1:9pxs5pRwIvhni5BDRPn/n5A8DeUod5TnBaeulFBX8EQ=
|
||||
github.com/ClickHouse/clickhouse-go/v2 v2.44.0/go.mod h1:giJfUVlMkcfUEPVfRpt51zZaGEx9i17gCos8gBl392c=
|
||||
github.com/andybalholm/brotli v1.2.0 h1:ukwgCxwYrmACq68yiUqwIWnGY0cTPox/M94sVwToPjQ=
|
||||
github.com/andybalholm/brotli v1.2.0/go.mod h1:rzTDkvFWvIrjDXZHkuS16NPggd91W3kUSvPlQ1pLaKY=
|
||||
github.com/apache/arrow-go/v18 v18.1.0 h1:agLwJUiVuwXZdwPYVrlITfx7bndULJ/dggbnLFgDp/Y=
|
||||
github.com/apache/arrow-go/v18 v18.1.0/go.mod h1:tigU/sIgKNXaesf5d7Y95jBBKS5KsxTqYBKXFsvKzo0=
|
||||
github.com/apache/thrift v0.21.0 h1:tdPmh/ptjE1IJnhbhrcl2++TauVjy242rkV/UzJChnE=
|
||||
github.com/apache/thrift v0.21.0/go.mod h1:W1H8aR/QRtYNvrPeFXBtobyRkd0/YVhTc6i07XIAgDw=
|
||||
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
|
||||
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/go-faster/city v1.0.1 h1:4WAxSZ3V2Ws4QRDrscLEDcibJY8uf41H6AhXDrNDcGw=
|
||||
github.com/go-faster/city v1.0.1/go.mod h1:jKcUJId49qdW3L1qKHH/3wPeUstCVpVSXTM6vO3VcTw=
|
||||
github.com/go-faster/errors v0.7.1 h1:MkJTnDoEdi9pDabt1dpWf7AA8/BaSYZqibYyhZ20AYg=
|
||||
github.com/go-faster/errors v0.7.1/go.mod h1:5ySTjWFiphBs07IKuiL69nxdfd5+fzh1u7FPGZP2quo=
|
||||
github.com/go-viper/mapstructure/v2 v2.2.1 h1:ZAaOCxANMuZx5RCeg0mBdEZk7DZasvvZIxtHqx8aGss=
|
||||
github.com/go-viper/mapstructure/v2 v2.2.1/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM=
|
||||
github.com/goccy/go-json v0.10.5 h1:Fq85nIqj+gXn/S5ahsiTlK3TmC85qgirsdTP/+DeaC4=
|
||||
github.com/goccy/go-json v0.10.5/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M=
|
||||
github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
|
||||
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
|
||||
github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
|
||||
github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
|
||||
github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
|
||||
github.com/google/flatbuffers v25.1.24+incompatible h1:4wPqL3K7GzBd1CwyhSd3usxLKOaJN/AC6puCca6Jm7o=
|
||||
github.com/google/flatbuffers v25.1.24+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8=
|
||||
github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
|
||||
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
|
||||
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
||||
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM=
|
||||
github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg=
|
||||
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 h1:iCEnooe7UlwOQYpKFhBabPMi4aNAfoODPEFNiAnClxo=
|
||||
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM=
|
||||
github.com/jackc/pgx/v5 v5.9.1 h1:uwrxJXBnx76nyISkhr33kQLlUqjv7et7b9FjCen/tdc=
|
||||
github.com/jackc/pgx/v5 v5.9.1/go.mod h1:mal1tBGAFfLHvZzaYh77YS/eC6IX9OWbRV1QIIM0Jn4=
|
||||
github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo=
|
||||
github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=
|
||||
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
|
||||
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
|
||||
github.com/klauspost/asmfmt v1.3.2 h1:4Ri7ox3EwapiOjCki+hw14RyKk201CN4rzyCJRFLpK4=
|
||||
github.com/klauspost/asmfmt v1.3.2/go.mod h1:AG8TuvYojzulgDAMCnYn50l/5QV3Bs/tp6j0HLHbNSE=
|
||||
github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
|
||||
github.com/klauspost/compress v1.18.3 h1:9PJRvfbmTabkOX8moIpXPbMMbYN60bWImDDU7L+/6zw=
|
||||
github.com/klauspost/compress v1.18.3/go.mod h1:R0h/fSBs8DE4ENlcrlib3PsXS61voFxhIs2DeRhCvJ4=
|
||||
github.com/klauspost/cpuid/v2 v2.2.9 h1:66ze0taIn2H33fBvCkXuv9BmCwDfafmiIVpKV9kKGuY=
|
||||
github.com/klauspost/cpuid/v2 v2.2.9/go.mod h1:rqkxqrZ1EhYM9G+hXH7YdowN5R5RGN6NK4QwQ3WMXF8=
|
||||
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
|
||||
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
|
||||
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
|
||||
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
|
||||
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
|
||||
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
|
||||
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
|
||||
github.com/marcboeker/go-duckdb v1.8.5 h1:tkYp+TANippy0DaIOP5OEfBEwbUINqiFqgwMQ44jME0=
|
||||
github.com/marcboeker/go-duckdb v1.8.5/go.mod h1:6mK7+WQE4P4u5AFLvVBmhFxY5fvhymFptghgJX6B+/8=
|
||||
github.com/mattn/go-sqlite3 v1.14.37 h1:3DOZp4cXis1cUIpCfXLtmlGolNLp2VEqhiB/PARNBIg=
|
||||
github.com/mattn/go-sqlite3 v1.14.37/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
|
||||
github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 h1:AMFGa4R4MiIpspGNG7Z948v4n35fFGB3RR3G/ry4FWs=
|
||||
github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8/go.mod h1:mC1jAcsrzbxHt8iiaC+zU4b1ylILSosueou12R++wfY=
|
||||
github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 h1:+n/aFZefKZp7spd8DFdX7uMikMLXX4oubIzJF4kv/wI=
|
||||
github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3/go.mod h1:RagcQ7I8IeTMnF8JTXieKnO4Z6JCsikNEzj0DwauVzE=
|
||||
github.com/montanaflynn/stats v0.0.0-20171201202039-1bf9dbcd8cbe/go.mod h1:wL8QJuTMNUDYhXwkmfOly8iTdp5TEcJFWZD2D7SIkUc=
|
||||
github.com/paulmach/orb v0.12.0 h1:z+zOwjmG3MyEEqzv92UN49Lg1JFYx0L9GpGKNVDKk1s=
|
||||
github.com/paulmach/orb v0.12.0/go.mod h1:5mULz1xQfs3bmQm63QEJA6lNGujuRafwA5S/EnuLaLU=
|
||||
github.com/paulmach/protoscan v0.2.1/go.mod h1:SpcSwydNLrxUGSDvXvO0P7g7AuhJ7lcKfDlhJCDw2gY=
|
||||
github.com/pierrec/lz4/v4 v4.1.25 h1:kocOqRffaIbU5djlIBr7Wh+cx82C0vtFb0fOurZHqD0=
|
||||
github.com/pierrec/lz4/v4 v4.1.25/go.mod h1:EoQMVJgeeEOMsCqCzqFm2O0cJvljX2nGZjcRIPL34O4=
|
||||
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
|
||||
github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
|
||||
github.com/segmentio/asm v1.2.1 h1:DTNbBqs57ioxAD4PrArqftgypG4/qNpXoJx8TVXxPR0=
|
||||
github.com/segmentio/asm v1.2.1/go.mod h1:BqMnlJP91P8d+4ibuonYZw9mfnzI9HfxselHZr5aAcs=
|
||||
github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k=
|
||||
github.com/shopspring/decimal v1.4.0/go.mod h1:gawqmDU56v4yIKSwfBSFip1HdCCXN8/+DMd9qYNcwME=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
|
||||
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
|
||||
github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk=
|
||||
github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI=
|
||||
github.com/xdg-go/scram v1.1.1/go.mod h1:RaEWvsqvNKKvBPvcKeFjrG2cJqOkHTiyTpzz23ni57g=
|
||||
github.com/xdg-go/stringprep v1.0.3/go.mod h1:W3f5j4i+9rC0kuIEJL0ky1VpHXQU3ocBgklLGvcBnW8=
|
||||
github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU=
|
||||
github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E=
|
||||
github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d/go.mod h1:rHwXgn7JulP+udvsHwJoVG1YGAP6VLg4y9I5dyZdqmA=
|
||||
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
|
||||
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
|
||||
github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ=
|
||||
github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0=
|
||||
github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0=
|
||||
github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA=
|
||||
go.mongodb.org/mongo-driver v1.11.4/go.mod h1:PTSz5yu21bkT/wXpkS7WR5f0ddqw5quethTUn9WM+2g=
|
||||
go.opentelemetry.io/otel v1.41.0 h1:YlEwVsGAlCvczDILpUXpIpPSL/VPugt7zHThEMLce1c=
|
||||
go.opentelemetry.io/otel v1.41.0/go.mod h1:Yt4UwgEKeT05QbLwbyHXEwhnjxNO6D8L5PQP51/46dE=
|
||||
go.opentelemetry.io/otel/trace v1.41.0 h1:Vbk2co6bhj8L59ZJ6/xFTskY+tGAbOnCtQGVVa9TIN0=
|
||||
go.opentelemetry.io/otel/trace v1.41.0/go.mod h1:U1NU4ULCoxeDKc09yCWdWe+3QoyweJcISEVa1RBzOis=
|
||||
go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc=
|
||||
go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
|
||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
|
||||
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
|
||||
golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
|
||||
golang.org/x/crypto v0.50.0 h1:zO47/JPrL6vsNkINmLoo/PH1gcxpls50DNogFvB5ZGI=
|
||||
golang.org/x/crypto v0.50.0/go.mod h1:3muZ7vA7PBCE6xgPX7nkzzjiUq87kRItoJQM1Yo8S+Q=
|
||||
golang.org/x/exp v0.0.0-20250128182459-e0ece0dbea4c h1:KL/ZBHXgKGVmuZBZ01Lt57yE5ws8ZPSkkihmEyq7FXc=
|
||||
golang.org/x/exp v0.0.0-20250128182459-e0ece0dbea4c/go.mod h1:tujkw807nyEEAamNbDrEGzRav+ilXA7PCRAd6xsmwiU=
|
||||
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
||||
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
||||
golang.org/x/mod v0.34.0 h1:xIHgNUUnW6sYkcM5Jleh05DvLOtwc6RitGHbDk4akRI=
|
||||
golang.org/x/mod v0.34.0/go.mod h1:ykgH52iCZe79kzLLMhyCUzhMci+nQj+0XkbXpNYtVjY=
|
||||
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
||||
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
|
||||
golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
|
||||
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4=
|
||||
golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0=
|
||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.43.0 h1:Rlag2XtaFTxp19wS8MXlJwTvoh8ArU6ezoyFsMyCTNI=
|
||||
golang.org/x/sys v0.43.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
|
||||
golang.org/x/telemetry v0.0.0-20260311193753-579e4da9a98c h1:6a8FdnNk6bTXBjR4AGKFgUKuo+7GnR3FX5L7CbveeZc=
|
||||
golang.org/x/telemetry v0.0.0-20260311193753-579e4da9a98c/go.mod h1:TpUTTEp9frx7rTdLpC9gFG9kdI7zVLFTFFlqaH2Cncw=
|
||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
|
||||
golang.org/x/text v0.36.0 h1:JfKh3XmcRPqZPKevfXVpI1wXPTqbkE5f7JA92a55Yxg=
|
||||
golang.org/x/text v0.36.0/go.mod h1:NIdBknypM8iqVmPiuco0Dh6P5Jcdk8lJL0CUebqK164=
|
||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
||||
golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
|
||||
golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
|
||||
golang.org/x/tools v0.43.0 h1:12BdW9CeB3Z+J/I/wj34VMl8X+fEXBxVR90JeMX5E7s=
|
||||
golang.org/x/tools v0.43.0/go.mod h1:uHkMso649BX2cZK6+RpuIPXS3ho2hZo4FVwfoy1vIk0=
|
||||
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da h1:noIWHXmPHxILtqtCOPIhSt0ABwskkZKjD3bXGnZGpNY=
|
||||
golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90=
|
||||
gonum.org/v1/gonum v0.15.1 h1:FNy7N6OUZVUaWG9pTiD+jlhdQ3lMP+/LcTpJ6+a8sQ0=
|
||||
gonum.org/v1/gonum v0.15.1/go.mod h1:eZTZuRFrzu5pcyjN5wJhcIhnUdNijYxX1T2IcrOGY0o=
|
||||
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
|
||||
google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
|
||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
|
||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
nhooyr.io/websocket v1.8.17 h1:KEVeLJkUywCKVsnLIDlD/5gtayKp8VoCkksHCGGfT9Y=
|
||||
nhooyr.io/websocket v1.8.17/go.mod h1:rN9OFWIUwuxg4fR5tELlYC04bXYowCP9GX47ivo2l+c=
|
||||
@@ -0,0 +1,362 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
_ "github.com/mattn/go-sqlite3"
|
||||
)
|
||||
|
||||
type Server struct {
|
||||
registryRoot string
|
||||
selfPC string
|
||||
opsDB *sql.DB
|
||||
|
||||
mu sync.RWMutex
|
||||
targets []Target
|
||||
}
|
||||
|
||||
func main() {
|
||||
addr := flag.String("bind", "127.0.0.1:8485", "HTTP bind address")
|
||||
registry := flag.String("registry", defaultRegistryRoot(), "fn_registry root")
|
||||
opsPath := flag.String("db", "", "operations.db path (default: <app_dir>/operations.db)")
|
||||
interval := flag.Duration("interval", 15*time.Second, "check loop interval")
|
||||
once := flag.Bool("once", false, "run one check cycle and exit (smoke test)")
|
||||
flag.Parse()
|
||||
|
||||
if *opsPath == "" {
|
||||
exe, _ := os.Executable()
|
||||
*opsPath = filepath.Join(filepath.Dir(exe), "operations.db")
|
||||
}
|
||||
|
||||
selfPC := readSelfPC()
|
||||
log.Printf("services_api: selfPC=%q registry=%q ops=%q", selfPC, *registry, *opsPath)
|
||||
|
||||
db, err := openOpsDB(*opsPath)
|
||||
if err != nil {
|
||||
log.Fatalf("open ops db: %v", err)
|
||||
}
|
||||
defer db.Close()
|
||||
|
||||
srv := &Server{registryRoot: *registry, selfPC: selfPC, opsDB: db}
|
||||
if err := srv.refreshTargets(); err != nil {
|
||||
log.Fatalf("load targets: %v", err)
|
||||
}
|
||||
|
||||
if *once {
|
||||
srv.runOneCycle()
|
||||
log.Printf("once: completed %d checks", len(srv.targets))
|
||||
return
|
||||
}
|
||||
|
||||
// Background loop.
|
||||
go srv.checkLoop(*interval)
|
||||
|
||||
mux := http.NewServeMux()
|
||||
mux.HandleFunc("/api/health", srv.handleHealth)
|
||||
mux.HandleFunc("/api/services", srv.handleServices)
|
||||
mux.HandleFunc("/api/check", srv.handleForceCheck)
|
||||
mux.HandleFunc("/api/pcs", srv.handlePCs)
|
||||
mux.HandleFunc("/api/action/", srv.handleAction)
|
||||
|
||||
log.Printf("services_api listening on %s", *addr)
|
||||
if err := http.ListenAndServe(*addr, mux); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func defaultRegistryRoot() string {
|
||||
if v := os.Getenv("FN_REGISTRY_ROOT"); v != "" {
|
||||
return v
|
||||
}
|
||||
exe, _ := os.Executable()
|
||||
// Walk up looking for registry.db.
|
||||
dir := filepath.Dir(exe)
|
||||
for i := 0; i < 6; i++ {
|
||||
if _, err := os.Stat(filepath.Join(dir, "registry.db")); err == nil {
|
||||
return dir
|
||||
}
|
||||
parent := filepath.Dir(dir)
|
||||
if parent == dir {
|
||||
break
|
||||
}
|
||||
dir = parent
|
||||
}
|
||||
return "/home/lucas/fn_registry"
|
||||
}
|
||||
|
||||
func readSelfPC() string {
|
||||
home, _ := os.UserHomeDir()
|
||||
b, err := os.ReadFile(filepath.Join(home, ".fn_pc"))
|
||||
if err != nil {
|
||||
return "unknown"
|
||||
}
|
||||
return strings.TrimSpace(string(b))
|
||||
}
|
||||
|
||||
func (s *Server) refreshTargets() error {
|
||||
ts, err := loadTargets(s.registryRoot)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
s.mu.Lock()
|
||||
s.targets = ts
|
||||
s.mu.Unlock()
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Server) runOneCycle() {
|
||||
s.mu.RLock()
|
||||
targets := append([]Target(nil), s.targets...)
|
||||
s.mu.RUnlock()
|
||||
|
||||
// Parallel probes (each remote SSH call can stall up to ~10s).
|
||||
// Cap concurrency to avoid overwhelming SSH agent / sockets.
|
||||
const maxParallel = 8
|
||||
sem := make(chan struct{}, maxParallel)
|
||||
results := make(chan ServiceCheck, len(targets))
|
||||
var wg sync.WaitGroup
|
||||
|
||||
for _, t := range targets {
|
||||
wg.Add(1)
|
||||
sem <- struct{}{}
|
||||
go func(tt Target) {
|
||||
defer wg.Done()
|
||||
defer func() { <-sem }()
|
||||
done := make(chan ServiceCheck, 1)
|
||||
go func() { done <- probeTarget(tt, s.selfPC) }()
|
||||
select {
|
||||
case c := <-done:
|
||||
results <- c
|
||||
case <-time.After(20 * time.Second):
|
||||
results <- ServiceCheck{
|
||||
AppID: tt.AppID,
|
||||
PCID: tt.PCID,
|
||||
SystemdState: "timeout",
|
||||
Error: "probe exceeded 20s",
|
||||
Overall: "no-route",
|
||||
}
|
||||
}
|
||||
}(t)
|
||||
}
|
||||
|
||||
go func() { wg.Wait(); close(results) }()
|
||||
|
||||
for c := range results {
|
||||
if err := upsertState(s.opsDB, c); err != nil {
|
||||
log.Printf("upsert %s/%s: %v", c.AppID, c.PCID, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Server) checkLoop(interval time.Duration) {
|
||||
tick := time.NewTicker(interval)
|
||||
defer tick.Stop()
|
||||
// First cycle immediately, then on interval.
|
||||
s.runOneCycle()
|
||||
refreshEvery := 5 * time.Minute
|
||||
lastRefresh := time.Now()
|
||||
for {
|
||||
<-tick.C
|
||||
if time.Since(lastRefresh) > refreshEvery {
|
||||
if err := s.refreshTargets(); err != nil {
|
||||
log.Printf("refresh targets: %v", err)
|
||||
}
|
||||
lastRefresh = time.Now()
|
||||
}
|
||||
s.runOneCycle()
|
||||
}
|
||||
}
|
||||
|
||||
// HTTP handlers ------------------------------------------------------------
|
||||
|
||||
func (s *Server) handleHealth(w http.ResponseWriter, r *http.Request) {
|
||||
writeJSON(w, http.StatusOK, map[string]any{"status": "ok", "self_pc": s.selfPC})
|
||||
}
|
||||
|
||||
func (s *Server) handleServices(w http.ResponseWriter, r *http.Request) {
|
||||
s.mu.RLock()
|
||||
targets := append([]Target(nil), s.targets...)
|
||||
s.mu.RUnlock()
|
||||
|
||||
rows, err := loadStates(s.opsDB, targets, s.selfPC)
|
||||
if err != nil {
|
||||
writeJSON(w, http.StatusInternalServerError, map[string]any{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
// Compute reachable per PC: a PC is unreachable when all its services
|
||||
// returned no-route (we never got systemd state) AND it is not selfPC.
|
||||
noRouteByPC := map[string]int{}
|
||||
totalByPC := map[string]int{}
|
||||
for _, r := range rows {
|
||||
totalByPC[r.PCID]++
|
||||
if r.Overall == "no-route" {
|
||||
noRouteByPC[r.PCID]++
|
||||
}
|
||||
}
|
||||
reachableByPC := map[string]bool{}
|
||||
for pc, total := range totalByPC {
|
||||
if pc == s.selfPC {
|
||||
reachableByPC[pc] = true
|
||||
continue
|
||||
}
|
||||
reachableByPC[pc] = noRouteByPC[pc] < total
|
||||
}
|
||||
for i := range rows {
|
||||
rows[i].Reachable = reachableByPC[rows[i].PCID]
|
||||
}
|
||||
|
||||
writeJSON(w, http.StatusOK, map[string]any{
|
||||
"self_pc": s.selfPC,
|
||||
"services": rows,
|
||||
"ts": time.Now().Unix(),
|
||||
"pcs": pcsSummary(reachableByPC, totalByPC, s.selfPC),
|
||||
})
|
||||
}
|
||||
|
||||
// pcsSummary builds the array used in /api/services and /api/pcs.
|
||||
func pcsSummary(reachable map[string]bool, total map[string]int, selfPC string) []map[string]any {
|
||||
out := make([]map[string]any, 0, len(total))
|
||||
for pc, n := range total {
|
||||
out = append(out, map[string]any{
|
||||
"pc_id": pc,
|
||||
"is_self": pc == selfPC,
|
||||
"reachable": reachable[pc],
|
||||
"services_count": n,
|
||||
})
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func (s *Server) handleForceCheck(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodPost {
|
||||
w.WriteHeader(http.StatusMethodNotAllowed)
|
||||
return
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(r.Context(), 60*time.Second)
|
||||
defer cancel()
|
||||
done := make(chan struct{})
|
||||
go func() {
|
||||
s.runOneCycle()
|
||||
close(done)
|
||||
}()
|
||||
select {
|
||||
case <-done:
|
||||
s.handleServices(w, r)
|
||||
case <-ctx.Done():
|
||||
writeJSON(w, http.StatusGatewayTimeout, map[string]any{"error": "check loop timed out"})
|
||||
}
|
||||
}
|
||||
|
||||
// handleAction handles POST /api/action/{app_id}/{pc_id}/restart.
|
||||
// Currently the only supported action is "restart"; we keep the URL
|
||||
// shape general for stop/start in v2.
|
||||
func (s *Server) handleAction(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodPost {
|
||||
w.WriteHeader(http.StatusMethodNotAllowed)
|
||||
return
|
||||
}
|
||||
// URL: /api/action/{app_id}/{pc_id}/{verb}
|
||||
path := strings.TrimPrefix(r.URL.Path, "/api/action/")
|
||||
parts := strings.Split(path, "/")
|
||||
if len(parts) != 3 {
|
||||
writeJSON(w, http.StatusBadRequest, map[string]any{
|
||||
"error": "expected /api/action/{app_id}/{pc_id}/{verb}",
|
||||
})
|
||||
return
|
||||
}
|
||||
appID, pcID, verb := parts[0], parts[1], parts[2]
|
||||
if verb != "restart" {
|
||||
writeJSON(w, http.StatusBadRequest, map[string]any{
|
||||
"error": "unsupported verb (allowed: restart)",
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
// Lookup target.
|
||||
s.mu.RLock()
|
||||
var t *Target
|
||||
for i := range s.targets {
|
||||
if s.targets[i].AppID == appID && s.targets[i].PCID == pcID {
|
||||
t = &s.targets[i]
|
||||
break
|
||||
}
|
||||
}
|
||||
s.mu.RUnlock()
|
||||
if t == nil {
|
||||
writeJSON(w, http.StatusNotFound, map[string]any{
|
||||
"error": "target not found",
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
stdout, stderr, code, err := restartTarget(*t, s.selfPC)
|
||||
if err != nil && code == -1 {
|
||||
writeJSON(w, http.StatusInternalServerError, map[string]any{
|
||||
"app_id": appID,
|
||||
"pc_id": pcID,
|
||||
"verb": verb,
|
||||
"error": err.Error(),
|
||||
"stderr": stderr,
|
||||
"exit_code": code,
|
||||
})
|
||||
return
|
||||
}
|
||||
status := http.StatusOK
|
||||
if code != 0 {
|
||||
status = http.StatusBadGateway
|
||||
}
|
||||
// Trigger a check cycle so UI refresh shows the new state immediately.
|
||||
go s.runOneCycle()
|
||||
|
||||
writeJSON(w, status, map[string]any{
|
||||
"app_id": appID,
|
||||
"pc_id": pcID,
|
||||
"verb": verb,
|
||||
"exit_code": code,
|
||||
"stdout": strings.TrimSpace(stdout),
|
||||
"stderr": strings.TrimSpace(stderr),
|
||||
})
|
||||
}
|
||||
|
||||
func (s *Server) handlePCs(w http.ResponseWriter, r *http.Request) {
|
||||
s.mu.RLock()
|
||||
targets := append([]Target(nil), s.targets...)
|
||||
s.mu.RUnlock()
|
||||
|
||||
counts := map[string]int{}
|
||||
for _, t := range targets {
|
||||
counts[t.PCID]++
|
||||
}
|
||||
out := make([]map[string]any, 0, len(counts))
|
||||
for pc, n := range counts {
|
||||
out = append(out, map[string]any{
|
||||
"pc_id": pc,
|
||||
"is_self": pc == s.selfPC,
|
||||
"services_count": n,
|
||||
})
|
||||
}
|
||||
writeJSON(w, http.StatusOK, map[string]any{"pcs": out, "self_pc": s.selfPC})
|
||||
}
|
||||
|
||||
func writeJSON(w http.ResponseWriter, code int, v any) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(code)
|
||||
enc := json.NewEncoder(w)
|
||||
enc.SetIndent("", " ")
|
||||
_ = enc.Encode(v)
|
||||
}
|
||||
|
||||
// stub to silence unused import
|
||||
var _ = fmt.Sprintf
|
||||
@@ -0,0 +1,29 @@
|
||||
-- 001: schema inicial de services_api operations.db (issue 0106)
|
||||
-- Estado actual + transitions append-only.
|
||||
|
||||
CREATE TABLE IF NOT EXISTS service_state (
|
||||
app_id TEXT NOT NULL,
|
||||
pc_id TEXT NOT NULL,
|
||||
systemd_state TEXT NOT NULL DEFAULT '', -- active|inactive|failed|unknown|no-route
|
||||
port_listening INTEGER NOT NULL DEFAULT 0,
|
||||
http_status INTEGER NOT NULL DEFAULT 0,
|
||||
http_latency_ms INTEGER NOT NULL DEFAULT 0,
|
||||
last_check_ts INTEGER NOT NULL DEFAULT 0,
|
||||
last_change_ts INTEGER NOT NULL DEFAULT 0,
|
||||
last_error TEXT NOT NULL DEFAULT '',
|
||||
overall TEXT NOT NULL DEFAULT 'unknown', -- ok|degraded|down|no-route|unknown
|
||||
PRIMARY KEY (app_id, pc_id)
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS service_transition (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
ts INTEGER NOT NULL,
|
||||
app_id TEXT NOT NULL,
|
||||
pc_id TEXT NOT NULL,
|
||||
from_state TEXT NOT NULL,
|
||||
to_state TEXT NOT NULL,
|
||||
detail TEXT NOT NULL DEFAULT ''
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_transition_app_pc_ts
|
||||
ON service_transition(app_id, pc_id, ts DESC);
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Executable
BIN
Binary file not shown.
Reference in New Issue
Block a user