feat: metrics_agent inicial (collect+format+push host metrics a VictoriaMetrics)
This commit is contained in:
@@ -0,0 +1,85 @@
|
||||
// Command metrics_agent collects host metrics and pushes them to a
|
||||
// VictoriaMetrics / Prometheus-compatible ingest endpoint on a fixed interval.
|
||||
//
|
||||
// It is the per-node component of the fleet_monitoring project and is meant to
|
||||
// run as a systemd service on every machine of the fleet. It does no work of
|
||||
// its own beyond orchestration: the actual capability comes from three registry
|
||||
// functions in fn-registry/functions/infra:
|
||||
//
|
||||
// - CollectHostMetrics -> []infra.PromSample (CPU/mem/swap/disk/net/temp/procs)
|
||||
// - FormatPromExposition -> Prometheus exposition text
|
||||
// - PushPromRemote -> POST the text with optional basic auth + extra labels
|
||||
//
|
||||
// The "instance" label is attached at push time so a single binary, configured
|
||||
// only with its node name and the hub endpoint, identifies itself in Grafana.
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"log"
|
||||
"os"
|
||||
"os/signal"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"fn-registry/functions/infra"
|
||||
)
|
||||
|
||||
func main() {
|
||||
configPath := flag.String("config", "", "path to JSON config file")
|
||||
once := flag.Bool("once", false, "collect and push a single time, then exit (useful for testing)")
|
||||
flag.Parse()
|
||||
|
||||
cfg, err := loadConfig(*configPath)
|
||||
if err != nil {
|
||||
log.Fatalf("config: %v", err)
|
||||
}
|
||||
if cfg.HubURL == "" {
|
||||
log.Fatal("config: hub_url is required (set it in the config file or via FLEET_HUB_URL)")
|
||||
}
|
||||
log.Printf("metrics_agent starting: node=%q hub=%q interval=%ds", cfg.Node, cfg.HubURL, cfg.IntervalSec)
|
||||
|
||||
if *once {
|
||||
if err := pushOnce(cfg); err != nil {
|
||||
log.Fatalf("push: %v", err)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
ticker := time.NewTicker(time.Duration(cfg.IntervalSec) * time.Second)
|
||||
defer ticker.Stop()
|
||||
|
||||
stop := make(chan os.Signal, 1)
|
||||
signal.Notify(stop, syscall.SIGINT, syscall.SIGTERM)
|
||||
|
||||
// Push once right away so a freshly started node shows up immediately,
|
||||
// then keep pushing on every tick.
|
||||
if err := pushOnce(cfg); err != nil {
|
||||
log.Printf("push error: %v", err)
|
||||
}
|
||||
for {
|
||||
select {
|
||||
case <-ticker.C:
|
||||
if err := pushOnce(cfg); err != nil {
|
||||
log.Printf("push error: %v", err)
|
||||
}
|
||||
case <-stop:
|
||||
log.Print("shutting down")
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// pushOnce runs a single collect -> format -> push cycle.
|
||||
func pushOnce(cfg Config) error {
|
||||
samples, err := infra.CollectHostMetrics()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
body := infra.FormatPromExposition(samples, time.Now().UnixMilli())
|
||||
if err := infra.PushPromRemote(cfg.HubURL, cfg.User, cfg.Pass, body, map[string]string{"instance": cfg.Node}); err != nil {
|
||||
return err
|
||||
}
|
||||
log.Printf("pushed %d samples", len(samples))
|
||||
return nil
|
||||
}
|
||||
Reference in New Issue
Block a user