feat: unibus_exporter — daemon que sondea /healthz del cluster unibus y empuja estado+posture a VictoriaMetrics
Compone parse_unibus_health + format_prom_exposition + push_prom_remote del registry (grupo fleet-metrics). Un solo exporter scrapea los 3 nodos por IP pública con la CA del cluster; labels node/instance por serie. Config JSON con secretos fuera de argv. Incluye systemd unit y unibus.example.json. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,106 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
)
|
||||
|
||||
// Node is a single unibus cluster member to probe. Name is the logical node
|
||||
// label ("magnus", "homer", "datardos"); URL is its full /healthz endpoint
|
||||
// (e.g. https://135.125.201.30:8470/healthz).
|
||||
type Node struct {
|
||||
Name string `json:"name"`
|
||||
URL string `json:"url"`
|
||||
}
|
||||
|
||||
// Config is the exporter runtime configuration, read from a JSON file. It holds
|
||||
// the list of unibus nodes to probe, the cluster CA used to verify their TLS,
|
||||
// the VictoriaMetrics ingest endpoint and its basic-auth credentials.
|
||||
//
|
||||
// Secrets (pass) live only in this file (chmod 600) or come from the environment
|
||||
// — never in argv. The example config carries a placeholder, not the real one.
|
||||
type Config struct {
|
||||
Nodes []Node `json:"nodes"` // unibus members to probe
|
||||
CACertPath string `json:"ca_cert_path"` // PEM CA of the unibus cluster, used to verify each node's TLS
|
||||
HubURL string `json:"hub_url"` // VictoriaMetrics import endpoint (…/api/v1/import/prometheus)
|
||||
User string `json:"user"` // basic-auth user for the hub (empty disables auth)
|
||||
Pass string `json:"pass"` // basic-auth password for the hub
|
||||
IntervalSec int `json:"interval_sec"` // scrape+push period in seconds (default 15)
|
||||
TimeoutSec int `json:"timeout_sec"` // per-node healthz GET timeout in seconds (default 8)
|
||||
Labels map[string]string `json:"labels"` // extra labels added to every series via extra_label (e.g. {"job":"unibus_exporter"})
|
||||
}
|
||||
|
||||
// loadConfig reads and validates the JSON config at path. Environment overrides:
|
||||
// UNIBUS_HUB_URL, UNIBUS_USER, UNIBUS_PASS, UNIBUS_CA_CERT, UNIBUS_INTERVAL.
|
||||
func loadConfig(path string) (Config, error) {
|
||||
cfg := Config{IntervalSec: 15, TimeoutSec: 8}
|
||||
if path != "" {
|
||||
b, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return cfg, err
|
||||
}
|
||||
if err := json.Unmarshal(b, &cfg); err != nil {
|
||||
return cfg, err
|
||||
}
|
||||
}
|
||||
if v := os.Getenv("UNIBUS_HUB_URL"); v != "" {
|
||||
cfg.HubURL = v
|
||||
}
|
||||
if v := os.Getenv("UNIBUS_USER"); v != "" {
|
||||
cfg.User = v
|
||||
}
|
||||
if v := os.Getenv("UNIBUS_PASS"); v != "" {
|
||||
cfg.Pass = v
|
||||
}
|
||||
if v := os.Getenv("UNIBUS_CA_CERT"); v != "" {
|
||||
cfg.CACertPath = v
|
||||
}
|
||||
if v := os.Getenv("UNIBUS_INTERVAL"); v != "" {
|
||||
if n, err := parsePositiveInt(v); err == nil {
|
||||
cfg.IntervalSec = n
|
||||
}
|
||||
}
|
||||
if cfg.IntervalSec <= 0 {
|
||||
cfg.IntervalSec = 15
|
||||
}
|
||||
if cfg.TimeoutSec <= 0 {
|
||||
cfg.TimeoutSec = 8
|
||||
}
|
||||
if cfg.Labels == nil {
|
||||
cfg.Labels = map[string]string{"job": "unibus_exporter"}
|
||||
} else if _, ok := cfg.Labels["job"]; !ok {
|
||||
cfg.Labels["job"] = "unibus_exporter"
|
||||
}
|
||||
return cfg, validate(cfg)
|
||||
}
|
||||
|
||||
func validate(cfg Config) error {
|
||||
if cfg.HubURL == "" {
|
||||
return fmt.Errorf("hub_url is required")
|
||||
}
|
||||
if len(cfg.Nodes) == 0 {
|
||||
return fmt.Errorf("at least one node is required")
|
||||
}
|
||||
if cfg.CACertPath == "" {
|
||||
return fmt.Errorf("ca_cert_path is required (PEM CA of the unibus cluster)")
|
||||
}
|
||||
for i, n := range cfg.Nodes {
|
||||
if n.Name == "" || n.URL == "" {
|
||||
return fmt.Errorf("node[%d]: name and url are required", i)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func parsePositiveInt(s string) (int, error) {
|
||||
var n int
|
||||
_, err := fmt.Sscanf(s, "%d", &n)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
if n <= 0 {
|
||||
return 0, fmt.Errorf("not positive")
|
||||
}
|
||||
return n, nil
|
||||
}
|
||||
Reference in New Issue
Block a user