commit 486d9e4d99bc0e09e9f819886c639cec73292dd2 Author: fn-registry agent Date: Tue May 19 00:31:23 2026 +0200 chore: sync from fn-registry agent diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..386d283 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +registry.db diff --git a/app.md b/app.md new file mode 100644 index 0000000..34999bc --- /dev/null +++ b/app.md @@ -0,0 +1,94 @@ +--- +name: services_api +lang: go +domain: infra +version: 0.1.0 +description: "Backend HTTP que monitoriza apps con tag 'service' en el PC local y en PCs remotos via SSH. Loop de checks cada 15s, persistencia en operations.db. Frontend dedicado: app C++ services_monitor. Issue 0106." +tags: [service, monitoring, sqlite, ssh, systemd, http] +uses_functions: + - sqlite_apply_versioned_migrations_go_infra + - ssh_exec_go_infra +uses_types: + - ssh_conn_go_infra +framework: "net/http" +entry_point: "main.go" +dir_path: "apps/services_api" +service: + port: 8485 + health_endpoint: /api/health + health_timeout_s: 3 + systemd_unit: services_api.service + systemd_scope: user + restart_policy: always + runtime: systemd-user + pc_targets: + - aurgi-pc + - home-wsl + is_local_only: false +--- + +# services_api + +## Que hace + +Monitor cross-PC de los services del registry (issue 0106). + +1. Lee `registry.db` (read-only) → apps con `tag: service` + `service_targets`. +2. Loop cada 15s: por cada (app, pc) chequea systemd + port listening + HTTP health. +3. Local (pc == self): `systemctl is-active`, TCP dial, `http.Client`. +4. Remoto: mismo set de comandos a traves de `ssh_exec_go_infra`. Si no hay ruta SSH → `no-route`. +5. Persistencia en `operations.db`: `service_state` (estado actual) + `service_transition` (cambios). + +## Cuando usarla + +- Saber de un vistazo que services estan vivos en cada PC. +- Detectar caidas silenciosas (ej. `sqlite_api` murio 20h sin alerta el 2026-05-17). +- Auditar cobertura por PC antes de un deploy. + +## Lanzar + +```bash +cd apps/services_api +CGO_ENABLED=1 go build -tags fts5 -o services_api . +./services_api --bind 127.0.0.1:8485 +# o ./services_api --once para un solo ciclo (smoke) +``` + +Abrir `http://127.0.0.1:8485` en el navegador. + +## Endpoints + +| Ruta | Que devuelve | +|---|---| +| `GET /api/health` | `{"status":"ok","self_pc":"..."}` | +| `GET /api/services` | Snapshot completo (`services[]`) + `self_pc` + `ts` | +| `POST /api/check` | Fuerza un ciclo de checks; bloquea hasta completar | +| `GET /api/pcs` | Lista PCs con `services_count` | + +## Gotchas + +- **PCs remotos sin entrada en `~/.ssh/config`** quedan en `no-route`/`unknown`. Anadir alias antes de esperar status real (ej. `aurgi-pc` no esta en config hoy). +- **Solo lectura de registry.db**; no escribe en el. Las migraciones son sobre la propia operations.db de la app (`apps/services_api/operations.db`, tabla `service_state` + `service_transition`). +- **`overall`** se computa por runtime: + - `systemd-*` / `stdio`: requiere `systemctl is-active == active`. Si hay puerto, ademas TCP + HTTP 2xx/3xx. + - `docker-compose` / `manual`: solo puerto + HTTP. +- **Reload de targets** cada 5 min (background) sin reiniciar el proceso. Cambios en `app.md` requieren `./fn index` primero. +- **No expone tools mutadoras** en v1 (start/stop/restart). Solo alerta. Issue 0106 marca auto-fix detras de feature flag para v2. + +## Validacion + +```bash +./services_api --once && \ + sqlite3 operations.db \ + "SELECT app_id, pc_id, overall FROM service_state ORDER BY app_id, pc_id;" +``` + + +## Capability growth log + +Una linea por bump SemVer. Bump-type segun `.claude/commands/version.md`: +- `major`: breaking observable (CLI args, schema BBDD propia, formato wire). +- `minor`: feature aditiva (nuevo panel, endpoint, opcion). +- `patch`: bugfix sin cambio observable. + +- v0.1.0 (2026-05-18) — baseline. diff --git a/check.go b/check.go new file mode 100644 index 0000000..be9fd9d --- /dev/null +++ b/check.go @@ -0,0 +1,532 @@ +package main + +import ( + "context" + "database/sql" + "fmt" + "net" + "net/http" + "os/exec" + "strconv" + "strings" + "time" + + "fn-registry/functions/infra" +) + +// Target is one (app, pc) combination derived from registry.db apps tagged +// service + service_targets. +type Target struct { + AppID string + Name string + Runtime string + Port int + HealthEndpoint string + HealthTimeoutS int + SystemdUnit string + SystemdScope string + IsLocalOnly bool + PCID string +} + +// ServiceCheck is the result of one probe of a target. +type ServiceCheck struct { + AppID string + PCID string + SystemdState string + PortListening bool + HTTPStatus int + HTTPLatencyMs int + Error string + Overall string +} + +// loadTargets queries registry.db for the cross product of services x pc_targets. +func loadTargets(registryRoot string) ([]Target, error) { + dbPath := registryRoot + "/registry.db" + db, err := sql.Open("sqlite3", dbPath+"?mode=ro&_journal_mode=WAL") + if err != nil { + return nil, fmt.Errorf("open registry.db: %w", err) + } + defer db.Close() + + rows, err := db.Query(` + SELECT a.id, a.name, + COALESCE(a.service_runtime,''), + COALESCE(a.service_port,0), + COALESCE(a.service_health_endpoint,''), + COALESCE(a.service_health_timeout_s,3), + COALESCE(a.service_systemd_unit,''), + COALESCE(a.service_systemd_scope,''), + COALESCE(a.service_is_local_only,0), + t.pc_id + FROM apps a + JOIN service_targets t ON t.app_id = a.id + WHERE a.tags LIKE '%service%' + ORDER BY a.name, t.pc_id + `) + if err != nil { + return nil, fmt.Errorf("query targets: %w", err) + } + defer rows.Close() + + var out []Target + for rows.Next() { + var t Target + var localOnly int + if err := rows.Scan( + &t.AppID, &t.Name, &t.Runtime, &t.Port, &t.HealthEndpoint, &t.HealthTimeoutS, + &t.SystemdUnit, &t.SystemdScope, &localOnly, &t.PCID, + ); err != nil { + return nil, err + } + t.IsLocalOnly = localOnly != 0 + if t.HealthTimeoutS <= 0 { + t.HealthTimeoutS = 3 + } + out = append(out, t) + } + return out, rows.Err() +} + +// probeTarget probes one target and returns a ServiceCheck. +func probeTarget(t Target, selfPC string) ServiceCheck { + c := ServiceCheck{AppID: t.AppID, PCID: t.PCID} + + // Local check only when the target lives on selfPC. Remote PCs always go + // over SSH; if SSH has no route the probe returns "no-route" — that is + // exactly what we want to surface "PC unreachable" upstream. + if t.PCID == selfPC { + probeLocal(&c, t) + } else { + probeRemote(&c, t) + } + c.Overall = computeOverall(t, c) + return c +} + +func probeLocal(c *ServiceCheck, t Target) { + // systemd state. + if t.SystemdUnit != "" { + c.SystemdState = systemctlIsActiveLocal(t.SystemdUnit, t.SystemdScope) + } else { + c.SystemdState = "n/a" + } + + // Port listening (TCP localhost). + if t.Port > 0 { + c.PortListening = portListeningLocal(t.Port, time.Duration(t.HealthTimeoutS)*time.Second) + } + + // HTTP health. + if t.Port > 0 && t.HealthEndpoint != "" { + status, latency, err := httpProbe("127.0.0.1", t.Port, t.HealthEndpoint, time.Duration(t.HealthTimeoutS)*time.Second) + c.HTTPStatus = status + c.HTTPLatencyMs = latency + if err != nil { + c.Error = err.Error() + } + } +} + +func probeRemote(c *ServiceCheck, t Target) { + conn := infra.SSHConn{Host: t.PCID} + + // docker-compose runtime: state via `docker compose ls`, no systemctl. + if t.Runtime == "docker-compose" { + probeDockerComposeRemote(c, t, conn) + return + } + + // systemd state via ssh. + if t.SystemdUnit != "" { + cmd := fmt.Sprintf("systemctl is-active %s", shellEscape(t.SystemdUnit)) + if t.SystemdScope == "user" { + cmd = "systemctl --user is-active " + shellEscape(t.SystemdUnit) + } + stdout, stderr, code, err := infra.SSHExec(conn, cmd) + // SSH transport failure (ssh exit 255 = resolve/connect refused/auth) + // or local exec error → mark unreachable. systemctl exit codes 3 ("inactive"), + // 4 ("unit not found") are still semantic answers from a reachable host. + if err != nil || code == 255 || code < 0 { + c.SystemdState = "no-route" + detail := strings.TrimSpace(stderr) + if err != nil { + detail = err.Error() + } + c.Error = detail + return + } + // systemctl returns "inactive" exit 3 when unit is stopped OR missing. + // Distinguish via stderr (unit not found). + stderrLower := strings.ToLower(stderr) + if strings.Contains(stderrLower, "could not be found") || + strings.Contains(stderrLower, "not-found") || + strings.Contains(stderrLower, "not found") { + c.SystemdState = "not-installed" + return + } + c.SystemdState = strings.TrimSpace(stdout) + if c.SystemdState == "" { + c.SystemdState = "unknown" + } + } else { + c.SystemdState = "n/a" + } + + if t.Port > 0 { + // Port listening via ss on remote. + cmd := fmt.Sprintf("ss -tln '( sport = :%d )' | tail -n +2 | head -n1", t.Port) + stdout, _, _, err := infra.SSHExec(conn, cmd) + if err == nil && strings.TrimSpace(stdout) != "" { + c.PortListening = true + } + + // HTTP probe via curl on remote. + if t.HealthEndpoint != "" { + url := fmt.Sprintf("http://127.0.0.1:%d%s", t.Port, t.HealthEndpoint) + fmtCmd := `curl -s -o /dev/null -w "%{http_code} %{time_total}" -m ` + + strconv.Itoa(t.HealthTimeoutS) + " " + shellEscape(url) + stdout, stderr, _, err := infra.SSHExec(conn, fmtCmd) + if err != nil { + c.Error = strings.TrimSpace(stderr) + return + } + parts := strings.Fields(strings.TrimSpace(stdout)) + if len(parts) >= 1 { + if code, perr := strconv.Atoi(parts[0]); perr == nil { + c.HTTPStatus = code + } + } + if len(parts) >= 2 { + if secs, perr := strconv.ParseFloat(parts[1], 64); perr == nil { + c.HTTPLatencyMs = int(secs * 1000) + } + } + } + } +} + +func computeOverall(t Target, c ServiceCheck) string { + if strings.HasPrefix(c.SystemdState, "no-route") { + return "no-route" + } + if c.SystemdState == "not-installed" { + return "not-installed" + } + + // When the probe returned a systemd is-active style answer, route through + // the systemd branch regardless of the declared runtime. This covers the + // case where an app advertises `runtime: docker-compose` (because it lives + // on the primary VPS) but also runs as a bare systemd unit on a developer + // PC. The PC-local probe always asks systemctl; trust its output. + switch c.SystemdState { + case "active", "inactive", "failed", "activating", "deactivating", "reloading", "unknown", "n/a": + // fallthrough to systemd-style aggregation below + default: + if t.Runtime == "docker-compose" { + if c.SystemdState == "" { + return "unknown" + } + ran, exited, _ := parseComposeCounts(c.SystemdState) + if ran == 0 { + return "down" + } + if exited > 0 { + return "degraded" + } + if t.Port > 0 && t.HealthEndpoint != "" && + (c.HTTPStatus > 0 && (c.HTTPStatus < 200 || c.HTTPStatus >= 400)) { + return "degraded" + } + return "ok" + } + } + + // Systemd-runtime apps. + if strings.HasPrefix(t.Runtime, "systemd-") || t.Runtime == "stdio" || c.SystemdState == "active" || c.SystemdState == "inactive" || c.SystemdState == "failed" { + if c.SystemdState != "active" { + return "down" + } + if t.Port > 0 { + if !c.PortListening { + return "degraded" + } + if t.HealthEndpoint != "" && (c.HTTPStatus < 200 || c.HTTPStatus >= 400) { + return "degraded" + } + } + return "ok" + } + + // manual / unknown runtime: lean on port + http only. + if t.Port > 0 { + if !c.PortListening { + return "down" + } + if t.HealthEndpoint != "" && (c.HTTPStatus < 200 || c.HTTPStatus >= 400) { + return "degraded" + } + return "ok" + } + + return "unknown" +} + +// parseComposeCounts parses a docker-compose `ls` status string like +// "running(10)", "running(8) | exited(2)" and returns (running, exited, total). +// Tolerant: unknown tokens map to 0. +func parseComposeCounts(s string) (running, exited, total int) { + parts := strings.Split(s, "|") + for _, p := range parts { + p = strings.TrimSpace(p) + // Format: kind(N) + open := strings.Index(p, "(") + closeIdx := strings.Index(p, ")") + if open == -1 || closeIdx == -1 || closeIdx < open { + continue + } + kind := strings.TrimSpace(p[:open]) + n, err := strconv.Atoi(strings.TrimSpace(p[open+1 : closeIdx])) + if err != nil { + continue + } + switch kind { + case "running": + running += n + case "exited", "stopped", "dead": + exited += n + } + total += n + } + return +} + +// systemctlIsActiveLocal runs `systemctl [--user] is-active ` and returns +// a normalized state. Distinguishes "not-installed" (unit file absent) from +// "inactive" (unit present but stopped). Both are unhealthy but mean different +// things for the human: not-installed = forget systemctl, deploy the unit. +func systemctlIsActiveLocal(unit, scope string) string { + args := []string{"is-active", unit} + if scope == "user" { + args = append([]string{"--user"}, args...) + } + ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) + defer cancel() + cmd := exec.CommandContext(ctx, "systemctl", args...) + var so, se strings.Builder + cmd.Stdout = &so + cmd.Stderr = &se + _ = cmd.Run() + state := strings.TrimSpace(so.String()) + stderr := strings.ToLower(se.String()) + if strings.Contains(stderr, "could not be found") || + strings.Contains(stderr, "not-found") || + strings.Contains(stderr, "not found") { + return "not-installed" + } + if state == "" { + return "unknown" + } + return state +} + +func portListeningLocal(port int, timeout time.Duration) bool { + conn, err := net.DialTimeout("tcp", fmt.Sprintf("127.0.0.1:%d", port), timeout) + if err != nil { + return false + } + conn.Close() + return true +} + +func httpProbe(host string, port int, path string, timeout time.Duration) (int, int, error) { + client := &http.Client{Timeout: timeout} + url := fmt.Sprintf("http://%s:%d%s", host, port, path) + start := time.Now() + resp, err := client.Get(url) + latencyMs := int(time.Since(start) / time.Millisecond) + if err != nil { + return 0, latencyMs, err + } + defer resp.Body.Close() + return resp.StatusCode, latencyMs, nil +} + +// probeDockerComposeRemote queries `docker compose ls --format json` over SSH +// and maps the running/expected ratio into a synthetic systemd_state-like value +// + port/HTTP probes already declared. +// +// Project name defaults to t.Name (the app's frontmatter `name`). Override +// later via a `docker_compose_project` field if needed. +func probeDockerComposeRemote(c *ServiceCheck, t Target, conn infra.SSHConn) { + project := t.Name + + // `docker compose ls --format json` returns array of {Name, Status, ConfigFiles}. + // Status string examples: "running(10)", "running(8) | exited(2)", "exited(5)". + cmd := "docker compose ls --all --format json" + stdout, stderr, code, err := infra.SSHExec(conn, cmd) + if err != nil || code != 0 { + c.SystemdState = "no-route" + detail := strings.TrimSpace(stderr) + if err != nil { + detail = err.Error() + } + c.Error = detail + return + } + + // Tolerant parse: find the JSON object whose "Name" matches the project. + // Try variants: the app's name, hyphenized, and slashes-stripped. + candidates := []string{project, strings.ReplaceAll(project, "_", "-")} + var status string + var found bool + var matched string + for _, cand := range candidates { + if cand == "" { + continue + } + if s, ok := extractComposeStatus(stdout, cand); ok { + status, found, matched = s, true, cand + break + } + } + if !found { + c.SystemdState = "not-installed" + c.Error = "no docker compose project named " + project + + " (tried " + strings.Join(candidates, ", ") + ")" + return + } + _ = matched // reserved for future telemetry + + c.SystemdState = status + if t.Port > 0 { + probeRemotePort(c, t, conn) + } +} + +// extractComposeStatus parses the JSON array emitted by +// `docker compose ls --format json` and returns the Status field of the entry +// with Name == project. Tolerant to whitespace and field ordering. +func extractComposeStatus(jsonBlob, project string) (string, bool) { + // Naive but reliable: find `"Name":""` and the nearest + // `"Status":"..."` field within the same object. + needle := `"Name":"` + project + `"` + idx := strings.Index(jsonBlob, needle) + if idx == -1 { + // Try alternative: "Name": "project" with space. + needle2 := `"Name": "` + project + `"` + idx = strings.Index(jsonBlob, needle2) + if idx == -1 { + return "", false + } + } + // Bound the search window to the surrounding object. + open := strings.LastIndex(jsonBlob[:idx], "{") + close := strings.Index(jsonBlob[idx:], "}") + if open == -1 || close == -1 { + return "", false + } + obj := jsonBlob[open : idx+close+1] + // Find "Status":"..." + const stKey = `"Status":` + si := strings.Index(obj, stKey) + if si == -1 { + return "", false + } + // Skip whitespace and opening quote. + rest := obj[si+len(stKey):] + rest = strings.TrimLeft(rest, " \t") + if !strings.HasPrefix(rest, `"`) { + return "", false + } + rest = rest[1:] + end := strings.Index(rest, `"`) + if end == -1 { + return "", false + } + return rest[:end], true +} + +// probeRemotePort runs the port + http probes when a docker compose project +// declares a port that should be reachable on the remote host's loopback. +func probeRemotePort(c *ServiceCheck, t Target, conn infra.SSHConn) { + cmd := fmt.Sprintf("ss -tln '( sport = :%d )' | tail -n +2 | head -n1", t.Port) + stdout, _, _, err := infra.SSHExec(conn, cmd) + if err == nil && strings.TrimSpace(stdout) != "" { + c.PortListening = true + } + if t.HealthEndpoint == "" { + return + } + url := fmt.Sprintf("http://127.0.0.1:%d%s", t.Port, t.HealthEndpoint) + fmtCmd := `curl -s -o /dev/null -w "%{http_code} %{time_total}" -m ` + + strconv.Itoa(t.HealthTimeoutS) + " " + shellEscape(url) + stdout, stderr, _, err := infra.SSHExec(conn, fmtCmd) + if err != nil { + c.Error = strings.TrimSpace(stderr) + return + } + parts := strings.Fields(strings.TrimSpace(stdout)) + if len(parts) >= 1 { + if code, perr := strconv.Atoi(parts[0]); perr == nil { + c.HTTPStatus = code + } + } + if len(parts) >= 2 { + if secs, perr := strconv.ParseFloat(parts[1], 64); perr == nil { + c.HTTPLatencyMs = int(secs * 1000) + } + } +} + +// restartTarget runs `systemctl [--user] restart ` locally or via SSH +// depending on whether the target lives on selfPC or a remote PC. +// +// Returns (stdout, stderr, exit_code, transport_error). +// - transport_error != nil when ssh / exec failed to even run (DNS, network). +// In that case exit_code is -1. +// - exit_code != 0 when the command ran but systemctl rejected the request +// (unit not found, permission denied, etc.). transport_error is nil. +func restartTarget(t Target, selfPC string) (string, string, int, error) { + if t.SystemdUnit == "" { + return "", "no systemd_unit declared in app.md `service:` block", -1, + fmt.Errorf("no systemd_unit") + } + + if t.PCID == selfPC || t.IsLocalOnly { + args := []string{"restart", t.SystemdUnit} + if t.SystemdScope == "user" { + args = append([]string{"--user"}, args...) + } + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + cmd := exec.CommandContext(ctx, "systemctl", args...) + var so, se strings.Builder + cmd.Stdout = &so + cmd.Stderr = &se + err := cmd.Run() + code := 0 + if exitErr, ok := err.(*exec.ExitError); ok { + code = exitErr.ExitCode() + err = nil + } else if err != nil { + return so.String(), se.String(), -1, err + } + return so.String(), se.String(), code, nil + } + + // Remote via SSH. + conn := infra.SSHConn{Host: t.PCID} + cmd := fmt.Sprintf("systemctl restart %s", shellEscape(t.SystemdUnit)) + if t.SystemdScope == "user" { + cmd = "systemctl --user restart " + shellEscape(t.SystemdUnit) + } + stdout, stderr, code, err := infra.SSHExec(conn, cmd) + return stdout, stderr, code, err +} + +func shellEscape(s string) string { + // Conservative single-quote escape for arguments embedded in remote commands. + return "'" + strings.ReplaceAll(s, "'", "'\\''") + "'" +} diff --git a/db.go b/db.go new file mode 100644 index 0000000..56a70c2 --- /dev/null +++ b/db.go @@ -0,0 +1,145 @@ +package main + +import ( + "database/sql" + "embed" + "fmt" + "time" + + "fn-registry/functions/infra" +) + +//go:embed migrations/*.sql +var migrationsFS embed.FS + +// openOpsDB opens the operations.db and applies pending migrations. +func openOpsDB(path string) (*sql.DB, error) { + db, err := sql.Open("sqlite3", path+"?_journal_mode=WAL&_busy_timeout=5000") + if err != nil { + return nil, fmt.Errorf("open ops db: %w", err) + } + if err := infra.ApplyVersionedMigrations(db, migrationsFS, "migrations"); err != nil { + db.Close() + return nil, fmt.Errorf("apply migrations: %w", err) + } + return db, nil +} + +// upsertState writes the latest check into service_state. When `overall` changed +// vs previous row, also appends a row to service_transition for audit. +func upsertState(db *sql.DB, c ServiceCheck) error { + now := time.Now().Unix() + + var prevOverall string + row := db.QueryRow( + "SELECT overall FROM service_state WHERE app_id = ? AND pc_id = ?", + c.AppID, c.PCID, + ) + if err := row.Scan(&prevOverall); err != nil && err != sql.ErrNoRows { + return err + } + + changeTS := now + if prevOverall == c.Overall { + // Preserve last_change_ts. + var oldChange int64 + _ = db.QueryRow( + "SELECT last_change_ts FROM service_state WHERE app_id = ? AND pc_id = ?", + c.AppID, c.PCID, + ).Scan(&oldChange) + if oldChange > 0 { + changeTS = oldChange + } + } + + listening := 0 + if c.PortListening { + listening = 1 + } + + _, err := db.Exec(` + INSERT INTO service_state + (app_id, pc_id, systemd_state, port_listening, http_status, http_latency_ms, + last_check_ts, last_change_ts, last_error, overall) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + ON CONFLICT(app_id, pc_id) DO UPDATE SET + systemd_state = excluded.systemd_state, + port_listening = excluded.port_listening, + http_status = excluded.http_status, + http_latency_ms = excluded.http_latency_ms, + last_check_ts = excluded.last_check_ts, + last_change_ts = excluded.last_change_ts, + last_error = excluded.last_error, + overall = excluded.overall + `, c.AppID, c.PCID, c.SystemdState, listening, c.HTTPStatus, c.HTTPLatencyMs, + now, changeTS, c.Error, c.Overall) + if err != nil { + return fmt.Errorf("upsert state: %w", err) + } + + if prevOverall != "" && prevOverall != c.Overall { + _, err = db.Exec(` + INSERT INTO service_transition (ts, app_id, pc_id, from_state, to_state, detail) + VALUES (?, ?, ?, ?, ?, ?) + `, now, c.AppID, c.PCID, prevOverall, c.Overall, c.Error) + } + return err +} + +// StateRow is one row of /api/services output. +type StateRow struct { + AppID string `json:"app_id"` + AppName string `json:"app_name"` + PCID string `json:"pc_id"` + IsSelf bool `json:"is_self"` + Reachable bool `json:"reachable"` + Runtime string `json:"runtime"` + Port int `json:"port"` + HealthEndpoint string `json:"health_endpoint"` + SystemdUnit string `json:"systemd_unit"` + SystemdScope string `json:"systemd_scope"` + SystemdState string `json:"systemd_state"` + PortListening bool `json:"port_listening"` + HTTPStatus int `json:"http_status"` + HTTPLatencyMs int `json:"http_latency_ms"` + LastCheckTS int64 `json:"last_check_ts"` + LastChangeTS int64 `json:"last_change_ts"` + LastError string `json:"last_error"` + Overall string `json:"overall"` +} + +// loadStates returns the latest snapshot from service_state joined with the +// declared service metadata from registry.db (already merged in memory by the +// caller through the targetsCache). +func loadStates(opsDB *sql.DB, targets []Target, selfPC string) ([]StateRow, error) { + rows := make([]StateRow, 0, len(targets)) + for _, t := range targets { + r := StateRow{ + AppID: t.AppID, + AppName: t.Name, + PCID: t.PCID, + IsSelf: t.PCID == selfPC, + Runtime: t.Runtime, + Port: t.Port, + HealthEndpoint: t.HealthEndpoint, + SystemdUnit: t.SystemdUnit, + SystemdScope: t.SystemdScope, + Overall: "unknown", + } + var listening int + err := opsDB.QueryRow(` + SELECT systemd_state, port_listening, http_status, http_latency_ms, + last_check_ts, last_change_ts, last_error, overall + FROM service_state WHERE app_id = ? AND pc_id = ? + `, t.AppID, t.PCID).Scan( + &r.SystemdState, &listening, &r.HTTPStatus, &r.HTTPLatencyMs, + &r.LastCheckTS, &r.LastChangeTS, &r.LastError, &r.Overall, + ) + if err != nil && err != sql.ErrNoRows { + return nil, err + } + r.PortListening = listening != 0 + rows = append(rows, r) + } + return rows, nil +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..90c2c3f --- /dev/null +++ b/go.mod @@ -0,0 +1,48 @@ +module services_api + +go 1.25.0 + +require fn-registry v0.0.0 + +require ( + github.com/ClickHouse/ch-go v0.71.0 // indirect + github.com/ClickHouse/clickhouse-go/v2 v2.44.0 // indirect + github.com/andybalholm/brotli v1.2.0 // indirect + github.com/apache/arrow-go/v18 v18.1.0 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect + github.com/go-faster/city v1.0.1 // indirect + github.com/go-faster/errors v0.7.1 // indirect + github.com/go-viper/mapstructure/v2 v2.2.1 // indirect + github.com/goccy/go-json v0.10.5 // indirect + github.com/google/flatbuffers v25.1.24+incompatible // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/jackc/pgpassfile v1.0.0 // indirect + github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect + github.com/jackc/pgx/v5 v5.9.1 // indirect + github.com/jackc/puddle/v2 v2.2.2 // indirect + github.com/klauspost/compress v1.18.3 // indirect + github.com/klauspost/cpuid/v2 v2.2.9 // indirect + github.com/marcboeker/go-duckdb v1.8.5 // indirect + github.com/mattn/go-sqlite3 v1.14.37 + github.com/paulmach/orb v0.12.0 // indirect + github.com/pierrec/lz4/v4 v4.1.25 // indirect + github.com/segmentio/asm v1.2.1 // indirect + github.com/shopspring/decimal v1.4.0 // indirect + github.com/zeebo/xxh3 v1.0.2 // indirect + go.opentelemetry.io/otel v1.41.0 // indirect + go.opentelemetry.io/otel/trace v1.41.0 // indirect + go.yaml.in/yaml/v3 v3.0.4 // indirect + golang.org/x/crypto v0.50.0 // indirect + golang.org/x/exp v0.0.0-20250128182459-e0ece0dbea4c // indirect + golang.org/x/mod v0.34.0 // indirect + golang.org/x/sync v0.20.0 // indirect + golang.org/x/sys v0.43.0 // indirect + golang.org/x/telemetry v0.0.0-20260311193753-579e4da9a98c // indirect + golang.org/x/text v0.36.0 // indirect + golang.org/x/tools v0.43.0 // indirect + golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect + nhooyr.io/websocket v1.8.17 // indirect +) + +replace fn-registry => ../../ diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..e592006 --- /dev/null +++ b/go.sum @@ -0,0 +1,174 @@ +github.com/ClickHouse/ch-go v0.71.0 h1:bUdZ/EZj/LcVHsMqaRUP2holqygrPWQKeMjc6nZoyRM= +github.com/ClickHouse/ch-go v0.71.0/go.mod h1:NwbNc+7jaqfY58dmdDUbG4Jl22vThgx1cYjBw0vtgXw= +github.com/ClickHouse/clickhouse-go/v2 v2.44.0 h1:9pxs5pRwIvhni5BDRPn/n5A8DeUod5TnBaeulFBX8EQ= +github.com/ClickHouse/clickhouse-go/v2 v2.44.0/go.mod h1:giJfUVlMkcfUEPVfRpt51zZaGEx9i17gCos8gBl392c= +github.com/andybalholm/brotli v1.2.0 h1:ukwgCxwYrmACq68yiUqwIWnGY0cTPox/M94sVwToPjQ= +github.com/andybalholm/brotli v1.2.0/go.mod h1:rzTDkvFWvIrjDXZHkuS16NPggd91W3kUSvPlQ1pLaKY= +github.com/apache/arrow-go/v18 v18.1.0 h1:agLwJUiVuwXZdwPYVrlITfx7bndULJ/dggbnLFgDp/Y= +github.com/apache/arrow-go/v18 v18.1.0/go.mod h1:tigU/sIgKNXaesf5d7Y95jBBKS5KsxTqYBKXFsvKzo0= +github.com/apache/thrift v0.21.0 h1:tdPmh/ptjE1IJnhbhrcl2++TauVjy242rkV/UzJChnE= +github.com/apache/thrift v0.21.0/go.mod h1:W1H8aR/QRtYNvrPeFXBtobyRkd0/YVhTc6i07XIAgDw= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/go-faster/city v1.0.1 h1:4WAxSZ3V2Ws4QRDrscLEDcibJY8uf41H6AhXDrNDcGw= +github.com/go-faster/city v1.0.1/go.mod h1:jKcUJId49qdW3L1qKHH/3wPeUstCVpVSXTM6vO3VcTw= +github.com/go-faster/errors v0.7.1 h1:MkJTnDoEdi9pDabt1dpWf7AA8/BaSYZqibYyhZ20AYg= +github.com/go-faster/errors v0.7.1/go.mod h1:5ySTjWFiphBs07IKuiL69nxdfd5+fzh1u7FPGZP2quo= +github.com/go-viper/mapstructure/v2 v2.2.1 h1:ZAaOCxANMuZx5RCeg0mBdEZk7DZasvvZIxtHqx8aGss= +github.com/go-viper/mapstructure/v2 v2.2.1/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM= +github.com/goccy/go-json v0.10.5 h1:Fq85nIqj+gXn/S5ahsiTlK3TmC85qgirsdTP/+DeaC4= +github.com/goccy/go-json v0.10.5/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M= +github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= +github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= +github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= +github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/google/flatbuffers v25.1.24+incompatible h1:4wPqL3K7GzBd1CwyhSd3usxLKOaJN/AC6puCca6Jm7o= +github.com/google/flatbuffers v25.1.24+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= +github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM= +github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg= +github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 h1:iCEnooe7UlwOQYpKFhBabPMi4aNAfoODPEFNiAnClxo= +github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM= +github.com/jackc/pgx/v5 v5.9.1 h1:uwrxJXBnx76nyISkhr33kQLlUqjv7et7b9FjCen/tdc= +github.com/jackc/pgx/v5 v5.9.1/go.mod h1:mal1tBGAFfLHvZzaYh77YS/eC6IX9OWbRV1QIIM0Jn4= +github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo= +github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4= +github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/klauspost/asmfmt v1.3.2 h1:4Ri7ox3EwapiOjCki+hw14RyKk201CN4rzyCJRFLpK4= +github.com/klauspost/asmfmt v1.3.2/go.mod h1:AG8TuvYojzulgDAMCnYn50l/5QV3Bs/tp6j0HLHbNSE= +github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk= +github.com/klauspost/compress v1.18.3 h1:9PJRvfbmTabkOX8moIpXPbMMbYN60bWImDDU7L+/6zw= +github.com/klauspost/compress v1.18.3/go.mod h1:R0h/fSBs8DE4ENlcrlib3PsXS61voFxhIs2DeRhCvJ4= +github.com/klauspost/cpuid/v2 v2.2.9 h1:66ze0taIn2H33fBvCkXuv9BmCwDfafmiIVpKV9kKGuY= +github.com/klauspost/cpuid/v2 v2.2.9/go.mod h1:rqkxqrZ1EhYM9G+hXH7YdowN5R5RGN6NK4QwQ3WMXF8= +github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/marcboeker/go-duckdb v1.8.5 h1:tkYp+TANippy0DaIOP5OEfBEwbUINqiFqgwMQ44jME0= +github.com/marcboeker/go-duckdb v1.8.5/go.mod h1:6mK7+WQE4P4u5AFLvVBmhFxY5fvhymFptghgJX6B+/8= +github.com/mattn/go-sqlite3 v1.14.37 h1:3DOZp4cXis1cUIpCfXLtmlGolNLp2VEqhiB/PARNBIg= +github.com/mattn/go-sqlite3 v1.14.37/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= +github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 h1:AMFGa4R4MiIpspGNG7Z948v4n35fFGB3RR3G/ry4FWs= +github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8/go.mod h1:mC1jAcsrzbxHt8iiaC+zU4b1ylILSosueou12R++wfY= +github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 h1:+n/aFZefKZp7spd8DFdX7uMikMLXX4oubIzJF4kv/wI= +github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3/go.mod h1:RagcQ7I8IeTMnF8JTXieKnO4Z6JCsikNEzj0DwauVzE= +github.com/montanaflynn/stats v0.0.0-20171201202039-1bf9dbcd8cbe/go.mod h1:wL8QJuTMNUDYhXwkmfOly8iTdp5TEcJFWZD2D7SIkUc= +github.com/paulmach/orb v0.12.0 h1:z+zOwjmG3MyEEqzv92UN49Lg1JFYx0L9GpGKNVDKk1s= +github.com/paulmach/orb v0.12.0/go.mod h1:5mULz1xQfs3bmQm63QEJA6lNGujuRafwA5S/EnuLaLU= +github.com/paulmach/protoscan v0.2.1/go.mod h1:SpcSwydNLrxUGSDvXvO0P7g7AuhJ7lcKfDlhJCDw2gY= +github.com/pierrec/lz4/v4 v4.1.25 h1:kocOqRffaIbU5djlIBr7Wh+cx82C0vtFb0fOurZHqD0= +github.com/pierrec/lz4/v4 v4.1.25/go.mod h1:EoQMVJgeeEOMsCqCzqFm2O0cJvljX2nGZjcRIPL34O4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= +github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= +github.com/segmentio/asm v1.2.1 h1:DTNbBqs57ioxAD4PrArqftgypG4/qNpXoJx8TVXxPR0= +github.com/segmentio/asm v1.2.1/go.mod h1:BqMnlJP91P8d+4ibuonYZw9mfnzI9HfxselHZr5aAcs= +github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k= +github.com/shopspring/decimal v1.4.0/go.mod h1:gawqmDU56v4yIKSwfBSFip1HdCCXN8/+DMd9qYNcwME= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk= +github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI= +github.com/xdg-go/scram v1.1.1/go.mod h1:RaEWvsqvNKKvBPvcKeFjrG2cJqOkHTiyTpzz23ni57g= +github.com/xdg-go/stringprep v1.0.3/go.mod h1:W3f5j4i+9rC0kuIEJL0ky1VpHXQU3ocBgklLGvcBnW8= +github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU= +github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E= +github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d/go.mod h1:rHwXgn7JulP+udvsHwJoVG1YGAP6VLg4y9I5dyZdqmA= +github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ= +github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0= +github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0= +github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA= +go.mongodb.org/mongo-driver v1.11.4/go.mod h1:PTSz5yu21bkT/wXpkS7WR5f0ddqw5quethTUn9WM+2g= +go.opentelemetry.io/otel v1.41.0 h1:YlEwVsGAlCvczDILpUXpIpPSL/VPugt7zHThEMLce1c= +go.opentelemetry.io/otel v1.41.0/go.mod h1:Yt4UwgEKeT05QbLwbyHXEwhnjxNO6D8L5PQP51/46dE= +go.opentelemetry.io/otel/trace v1.41.0 h1:Vbk2co6bhj8L59ZJ6/xFTskY+tGAbOnCtQGVVa9TIN0= +go.opentelemetry.io/otel/trace v1.41.0/go.mod h1:U1NU4ULCoxeDKc09yCWdWe+3QoyweJcISEVa1RBzOis= +go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc= +go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= +golang.org/x/crypto v0.50.0 h1:zO47/JPrL6vsNkINmLoo/PH1gcxpls50DNogFvB5ZGI= +golang.org/x/crypto v0.50.0/go.mod h1:3muZ7vA7PBCE6xgPX7nkzzjiUq87kRItoJQM1Yo8S+Q= +golang.org/x/exp v0.0.0-20250128182459-e0ece0dbea4c h1:KL/ZBHXgKGVmuZBZ01Lt57yE5ws8ZPSkkihmEyq7FXc= +golang.org/x/exp v0.0.0-20250128182459-e0ece0dbea4c/go.mod h1:tujkw807nyEEAamNbDrEGzRav+ilXA7PCRAd6xsmwiU= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.34.0 h1:xIHgNUUnW6sYkcM5Jleh05DvLOtwc6RitGHbDk4akRI= +golang.org/x/mod v0.34.0/go.mod h1:ykgH52iCZe79kzLLMhyCUzhMci+nQj+0XkbXpNYtVjY= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4= +golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.43.0 h1:Rlag2XtaFTxp19wS8MXlJwTvoh8ArU6ezoyFsMyCTNI= +golang.org/x/sys v0.43.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw= +golang.org/x/telemetry v0.0.0-20260311193753-579e4da9a98c h1:6a8FdnNk6bTXBjR4AGKFgUKuo+7GnR3FX5L7CbveeZc= +golang.org/x/telemetry v0.0.0-20260311193753-579e4da9a98c/go.mod h1:TpUTTEp9frx7rTdLpC9gFG9kdI7zVLFTFFlqaH2Cncw= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.36.0 h1:JfKh3XmcRPqZPKevfXVpI1wXPTqbkE5f7JA92a55Yxg= +golang.org/x/text v0.36.0/go.mod h1:NIdBknypM8iqVmPiuco0Dh6P5Jcdk8lJL0CUebqK164= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.43.0 h1:12BdW9CeB3Z+J/I/wj34VMl8X+fEXBxVR90JeMX5E7s= +golang.org/x/tools v0.43.0/go.mod h1:uHkMso649BX2cZK6+RpuIPXS3ho2hZo4FVwfoy1vIk0= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da h1:noIWHXmPHxILtqtCOPIhSt0ABwskkZKjD3bXGnZGpNY= +golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90= +gonum.org/v1/gonum v0.15.1 h1:FNy7N6OUZVUaWG9pTiD+jlhdQ3lMP+/LcTpJ6+a8sQ0= +gonum.org/v1/gonum v0.15.1/go.mod h1:eZTZuRFrzu5pcyjN5wJhcIhnUdNijYxX1T2IcrOGY0o= +google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= +google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +nhooyr.io/websocket v1.8.17 h1:KEVeLJkUywCKVsnLIDlD/5gtayKp8VoCkksHCGGfT9Y= +nhooyr.io/websocket v1.8.17/go.mod h1:rN9OFWIUwuxg4fR5tELlYC04bXYowCP9GX47ivo2l+c= diff --git a/main.go b/main.go new file mode 100644 index 0000000..68224f0 --- /dev/null +++ b/main.go @@ -0,0 +1,362 @@ +package main + +import ( + "context" + "database/sql" + "encoding/json" + "flag" + "fmt" + "log" + "net/http" + "os" + "path/filepath" + "strings" + "sync" + "time" + + _ "github.com/mattn/go-sqlite3" +) + +type Server struct { + registryRoot string + selfPC string + opsDB *sql.DB + + mu sync.RWMutex + targets []Target +} + +func main() { + addr := flag.String("bind", "127.0.0.1:8485", "HTTP bind address") + registry := flag.String("registry", defaultRegistryRoot(), "fn_registry root") + opsPath := flag.String("db", "", "operations.db path (default: /operations.db)") + interval := flag.Duration("interval", 15*time.Second, "check loop interval") + once := flag.Bool("once", false, "run one check cycle and exit (smoke test)") + flag.Parse() + + if *opsPath == "" { + exe, _ := os.Executable() + *opsPath = filepath.Join(filepath.Dir(exe), "operations.db") + } + + selfPC := readSelfPC() + log.Printf("services_api: selfPC=%q registry=%q ops=%q", selfPC, *registry, *opsPath) + + db, err := openOpsDB(*opsPath) + if err != nil { + log.Fatalf("open ops db: %v", err) + } + defer db.Close() + + srv := &Server{registryRoot: *registry, selfPC: selfPC, opsDB: db} + if err := srv.refreshTargets(); err != nil { + log.Fatalf("load targets: %v", err) + } + + if *once { + srv.runOneCycle() + log.Printf("once: completed %d checks", len(srv.targets)) + return + } + + // Background loop. + go srv.checkLoop(*interval) + + mux := http.NewServeMux() + mux.HandleFunc("/api/health", srv.handleHealth) + mux.HandleFunc("/api/services", srv.handleServices) + mux.HandleFunc("/api/check", srv.handleForceCheck) + mux.HandleFunc("/api/pcs", srv.handlePCs) + mux.HandleFunc("/api/action/", srv.handleAction) + + log.Printf("services_api listening on %s", *addr) + if err := http.ListenAndServe(*addr, mux); err != nil { + log.Fatal(err) + } +} + +func defaultRegistryRoot() string { + if v := os.Getenv("FN_REGISTRY_ROOT"); v != "" { + return v + } + exe, _ := os.Executable() + // Walk up looking for registry.db. + dir := filepath.Dir(exe) + for i := 0; i < 6; i++ { + if _, err := os.Stat(filepath.Join(dir, "registry.db")); err == nil { + return dir + } + parent := filepath.Dir(dir) + if parent == dir { + break + } + dir = parent + } + return "/home/lucas/fn_registry" +} + +func readSelfPC() string { + home, _ := os.UserHomeDir() + b, err := os.ReadFile(filepath.Join(home, ".fn_pc")) + if err != nil { + return "unknown" + } + return strings.TrimSpace(string(b)) +} + +func (s *Server) refreshTargets() error { + ts, err := loadTargets(s.registryRoot) + if err != nil { + return err + } + s.mu.Lock() + s.targets = ts + s.mu.Unlock() + return nil +} + +func (s *Server) runOneCycle() { + s.mu.RLock() + targets := append([]Target(nil), s.targets...) + s.mu.RUnlock() + + // Parallel probes (each remote SSH call can stall up to ~10s). + // Cap concurrency to avoid overwhelming SSH agent / sockets. + const maxParallel = 8 + sem := make(chan struct{}, maxParallel) + results := make(chan ServiceCheck, len(targets)) + var wg sync.WaitGroup + + for _, t := range targets { + wg.Add(1) + sem <- struct{}{} + go func(tt Target) { + defer wg.Done() + defer func() { <-sem }() + done := make(chan ServiceCheck, 1) + go func() { done <- probeTarget(tt, s.selfPC) }() + select { + case c := <-done: + results <- c + case <-time.After(20 * time.Second): + results <- ServiceCheck{ + AppID: tt.AppID, + PCID: tt.PCID, + SystemdState: "timeout", + Error: "probe exceeded 20s", + Overall: "no-route", + } + } + }(t) + } + + go func() { wg.Wait(); close(results) }() + + for c := range results { + if err := upsertState(s.opsDB, c); err != nil { + log.Printf("upsert %s/%s: %v", c.AppID, c.PCID, err) + } + } +} + +func (s *Server) checkLoop(interval time.Duration) { + tick := time.NewTicker(interval) + defer tick.Stop() + // First cycle immediately, then on interval. + s.runOneCycle() + refreshEvery := 5 * time.Minute + lastRefresh := time.Now() + for { + <-tick.C + if time.Since(lastRefresh) > refreshEvery { + if err := s.refreshTargets(); err != nil { + log.Printf("refresh targets: %v", err) + } + lastRefresh = time.Now() + } + s.runOneCycle() + } +} + +// HTTP handlers ------------------------------------------------------------ + +func (s *Server) handleHealth(w http.ResponseWriter, r *http.Request) { + writeJSON(w, http.StatusOK, map[string]any{"status": "ok", "self_pc": s.selfPC}) +} + +func (s *Server) handleServices(w http.ResponseWriter, r *http.Request) { + s.mu.RLock() + targets := append([]Target(nil), s.targets...) + s.mu.RUnlock() + + rows, err := loadStates(s.opsDB, targets, s.selfPC) + if err != nil { + writeJSON(w, http.StatusInternalServerError, map[string]any{"error": err.Error()}) + return + } + + // Compute reachable per PC: a PC is unreachable when all its services + // returned no-route (we never got systemd state) AND it is not selfPC. + noRouteByPC := map[string]int{} + totalByPC := map[string]int{} + for _, r := range rows { + totalByPC[r.PCID]++ + if r.Overall == "no-route" { + noRouteByPC[r.PCID]++ + } + } + reachableByPC := map[string]bool{} + for pc, total := range totalByPC { + if pc == s.selfPC { + reachableByPC[pc] = true + continue + } + reachableByPC[pc] = noRouteByPC[pc] < total + } + for i := range rows { + rows[i].Reachable = reachableByPC[rows[i].PCID] + } + + writeJSON(w, http.StatusOK, map[string]any{ + "self_pc": s.selfPC, + "services": rows, + "ts": time.Now().Unix(), + "pcs": pcsSummary(reachableByPC, totalByPC, s.selfPC), + }) +} + +// pcsSummary builds the array used in /api/services and /api/pcs. +func pcsSummary(reachable map[string]bool, total map[string]int, selfPC string) []map[string]any { + out := make([]map[string]any, 0, len(total)) + for pc, n := range total { + out = append(out, map[string]any{ + "pc_id": pc, + "is_self": pc == selfPC, + "reachable": reachable[pc], + "services_count": n, + }) + } + return out +} + +func (s *Server) handleForceCheck(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + w.WriteHeader(http.StatusMethodNotAllowed) + return + } + ctx, cancel := context.WithTimeout(r.Context(), 60*time.Second) + defer cancel() + done := make(chan struct{}) + go func() { + s.runOneCycle() + close(done) + }() + select { + case <-done: + s.handleServices(w, r) + case <-ctx.Done(): + writeJSON(w, http.StatusGatewayTimeout, map[string]any{"error": "check loop timed out"}) + } +} + +// handleAction handles POST /api/action/{app_id}/{pc_id}/restart. +// Currently the only supported action is "restart"; we keep the URL +// shape general for stop/start in v2. +func (s *Server) handleAction(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + w.WriteHeader(http.StatusMethodNotAllowed) + return + } + // URL: /api/action/{app_id}/{pc_id}/{verb} + path := strings.TrimPrefix(r.URL.Path, "/api/action/") + parts := strings.Split(path, "/") + if len(parts) != 3 { + writeJSON(w, http.StatusBadRequest, map[string]any{ + "error": "expected /api/action/{app_id}/{pc_id}/{verb}", + }) + return + } + appID, pcID, verb := parts[0], parts[1], parts[2] + if verb != "restart" { + writeJSON(w, http.StatusBadRequest, map[string]any{ + "error": "unsupported verb (allowed: restart)", + }) + return + } + + // Lookup target. + s.mu.RLock() + var t *Target + for i := range s.targets { + if s.targets[i].AppID == appID && s.targets[i].PCID == pcID { + t = &s.targets[i] + break + } + } + s.mu.RUnlock() + if t == nil { + writeJSON(w, http.StatusNotFound, map[string]any{ + "error": "target not found", + }) + return + } + + stdout, stderr, code, err := restartTarget(*t, s.selfPC) + if err != nil && code == -1 { + writeJSON(w, http.StatusInternalServerError, map[string]any{ + "app_id": appID, + "pc_id": pcID, + "verb": verb, + "error": err.Error(), + "stderr": stderr, + "exit_code": code, + }) + return + } + status := http.StatusOK + if code != 0 { + status = http.StatusBadGateway + } + // Trigger a check cycle so UI refresh shows the new state immediately. + go s.runOneCycle() + + writeJSON(w, status, map[string]any{ + "app_id": appID, + "pc_id": pcID, + "verb": verb, + "exit_code": code, + "stdout": strings.TrimSpace(stdout), + "stderr": strings.TrimSpace(stderr), + }) +} + +func (s *Server) handlePCs(w http.ResponseWriter, r *http.Request) { + s.mu.RLock() + targets := append([]Target(nil), s.targets...) + s.mu.RUnlock() + + counts := map[string]int{} + for _, t := range targets { + counts[t.PCID]++ + } + out := make([]map[string]any, 0, len(counts)) + for pc, n := range counts { + out = append(out, map[string]any{ + "pc_id": pc, + "is_self": pc == s.selfPC, + "services_count": n, + }) + } + writeJSON(w, http.StatusOK, map[string]any{"pcs": out, "self_pc": s.selfPC}) +} + +func writeJSON(w http.ResponseWriter, code int, v any) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(code) + enc := json.NewEncoder(w) + enc.SetIndent("", " ") + _ = enc.Encode(v) +} + +// stub to silence unused import +var _ = fmt.Sprintf diff --git a/migrations/001_init.sql b/migrations/001_init.sql new file mode 100644 index 0000000..32592b5 --- /dev/null +++ b/migrations/001_init.sql @@ -0,0 +1,29 @@ +-- 001: schema inicial de services_api operations.db (issue 0106) +-- Estado actual + transitions append-only. + +CREATE TABLE IF NOT EXISTS service_state ( + app_id TEXT NOT NULL, + pc_id TEXT NOT NULL, + systemd_state TEXT NOT NULL DEFAULT '', -- active|inactive|failed|unknown|no-route + port_listening INTEGER NOT NULL DEFAULT 0, + http_status INTEGER NOT NULL DEFAULT 0, + http_latency_ms INTEGER NOT NULL DEFAULT 0, + last_check_ts INTEGER NOT NULL DEFAULT 0, + last_change_ts INTEGER NOT NULL DEFAULT 0, + last_error TEXT NOT NULL DEFAULT '', + overall TEXT NOT NULL DEFAULT 'unknown', -- ok|degraded|down|no-route|unknown + PRIMARY KEY (app_id, pc_id) +); + +CREATE TABLE IF NOT EXISTS service_transition ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + ts INTEGER NOT NULL, + app_id TEXT NOT NULL, + pc_id TEXT NOT NULL, + from_state TEXT NOT NULL, + to_state TEXT NOT NULL, + detail TEXT NOT NULL DEFAULT '' +); + +CREATE INDEX IF NOT EXISTS idx_transition_app_pc_ts + ON service_transition(app_id, pc_id, ts DESC); diff --git a/operations.db b/operations.db new file mode 100644 index 0000000..6f7ac3a Binary files /dev/null and b/operations.db differ diff --git a/operations.db-shm b/operations.db-shm new file mode 100644 index 0000000..7da6cf3 Binary files /dev/null and b/operations.db-shm differ diff --git a/operations.db-wal b/operations.db-wal new file mode 100644 index 0000000..1496ff7 Binary files /dev/null and b/operations.db-wal differ diff --git a/services_api b/services_api new file mode 100755 index 0000000..8350520 Binary files /dev/null and b/services_api differ