feat: shipping de logs journald a Loki (config loki_url + shipper journalctl→PushLokiStream)
This commit is contained in:
@@ -11,10 +11,11 @@ import (
|
|||||||
// systemd drop-ins and for deploying the same binary to many nodes.
|
// systemd drop-ins and for deploying the same binary to many nodes.
|
||||||
type Config struct {
|
type Config struct {
|
||||||
Node string `json:"node"` // value of the "instance" label attached to every series
|
Node string `json:"node"` // value of the "instance" label attached to every series
|
||||||
HubURL string `json:"hub_url"` // full ingest URL, e.g. https://metrics-…/api/v1/import/prometheus
|
HubURL string `json:"hub_url"` // full metrics ingest URL, e.g. https://metrics-…/api/v1/import/prometheus
|
||||||
User string `json:"user"` // basic-auth user (empty disables auth)
|
LokiURL string `json:"loki_url"` // full Loki push URL, e.g. https://logs-…/loki/api/v1/push (empty disables log shipping)
|
||||||
|
User string `json:"user"` // basic-auth user, shared by metrics and logs (empty disables auth)
|
||||||
Pass string `json:"pass"` // basic-auth password
|
Pass string `json:"pass"` // basic-auth password
|
||||||
IntervalSec int `json:"interval_sec"` // push period in seconds (default 15)
|
IntervalSec int `json:"interval_sec"` // metrics push period in seconds (default 15)
|
||||||
}
|
}
|
||||||
|
|
||||||
// defaultConfig returns the baseline configuration: the machine hostname as the
|
// defaultConfig returns the baseline configuration: the machine hostname as the
|
||||||
@@ -44,6 +45,9 @@ func loadConfig(path string) (Config, error) {
|
|||||||
if v := os.Getenv("FLEET_HUB_URL"); v != "" {
|
if v := os.Getenv("FLEET_HUB_URL"); v != "" {
|
||||||
cfg.HubURL = v
|
cfg.HubURL = v
|
||||||
}
|
}
|
||||||
|
if v := os.Getenv("FLEET_LOKI_URL"); v != "" {
|
||||||
|
cfg.LokiURL = v
|
||||||
|
}
|
||||||
if v := os.Getenv("FLEET_USER"); v != "" {
|
if v := os.Getenv("FLEET_USER"); v != "" {
|
||||||
cfg.User = v
|
cfg.User = v
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,148 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"log"
|
||||||
|
"os/exec"
|
||||||
|
"strconv"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"fn-registry/functions/infra"
|
||||||
|
)
|
||||||
|
|
||||||
|
// journalEntry is the subset of fields we read from `journalctl -o json`.
|
||||||
|
type journalEntry struct {
|
||||||
|
Message json.RawMessage `json:"MESSAGE"`
|
||||||
|
Unit string `json:"_SYSTEMD_UNIT"`
|
||||||
|
Comm string `json:"_COMM"`
|
||||||
|
Realtime string `json:"__REALTIME_TIMESTAMP"` // microseconds since epoch, as a string
|
||||||
|
Priority string `json:"PRIORITY"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// message decodes MESSAGE, which journald serialises either as a JSON string
|
||||||
|
// (normal text) or as an array of byte values (binary/non-UTF8 logs).
|
||||||
|
func (e journalEntry) message() string {
|
||||||
|
if len(e.Message) == 0 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
if e.Message[0] == '"' {
|
||||||
|
var s string
|
||||||
|
if json.Unmarshal(e.Message, &s) == nil {
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
}
|
||||||
|
var arr []int
|
||||||
|
if json.Unmarshal(e.Message, &arr) == nil {
|
||||||
|
b := make([]byte, len(arr))
|
||||||
|
for i, v := range arr {
|
||||||
|
b[i] = byte(v)
|
||||||
|
}
|
||||||
|
return string(b)
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// tsNs returns the entry timestamp in nanoseconds, falling back to now.
|
||||||
|
func (e journalEntry) tsNs(now int64) int64 {
|
||||||
|
us, err := strconv.ParseInt(e.Realtime, 10, 64)
|
||||||
|
if err != nil || us == 0 {
|
||||||
|
return now
|
||||||
|
}
|
||||||
|
return us * 1000
|
||||||
|
}
|
||||||
|
|
||||||
|
// stream is a key identifying a Loki stream (one set of labels).
|
||||||
|
type logLine struct {
|
||||||
|
ts int64
|
||||||
|
unit string
|
||||||
|
line string
|
||||||
|
}
|
||||||
|
|
||||||
|
// shipJournald follows the systemd journal and pushes new lines to Loki in
|
||||||
|
// batches, grouped into one stream per unit. It returns when ctx is cancelled.
|
||||||
|
// If journalctl is not available (e.g. on Android/Termux) it logs once and exits
|
||||||
|
// without error, leaving metrics shipping unaffected.
|
||||||
|
func shipJournald(ctx context.Context, cfg Config) {
|
||||||
|
cmd := exec.CommandContext(ctx, "journalctl", "-f", "-o", "json", "-n", "0", "--no-pager")
|
||||||
|
stdout, err := cmd.StdoutPipe()
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("logs: cannot pipe journalctl: %v", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if err := cmd.Start(); err != nil {
|
||||||
|
log.Printf("logs: journalctl unavailable, log shipping disabled: %v", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
log.Print("logs: journald shipping started")
|
||||||
|
|
||||||
|
lines := make(chan logLine, 2000)
|
||||||
|
go func() {
|
||||||
|
scanner := bufio.NewScanner(stdout)
|
||||||
|
scanner.Buffer(make([]byte, 1024*1024), 4*1024*1024)
|
||||||
|
for scanner.Scan() {
|
||||||
|
var e journalEntry
|
||||||
|
if json.Unmarshal(scanner.Bytes(), &e) != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
msg := e.message()
|
||||||
|
if msg == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
unit := e.Unit
|
||||||
|
if unit == "" {
|
||||||
|
unit = e.Comm
|
||||||
|
}
|
||||||
|
if unit == "" {
|
||||||
|
unit = "kernel"
|
||||||
|
}
|
||||||
|
select {
|
||||||
|
case lines <- logLine{ts: e.tsNs(time.Now().UnixNano()), unit: unit, line: msg}:
|
||||||
|
default: // buffer full: drop rather than block the reader
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
ticker := time.NewTicker(5 * time.Second)
|
||||||
|
defer ticker.Stop()
|
||||||
|
var buf []logLine
|
||||||
|
|
||||||
|
flush := func() {
|
||||||
|
if len(buf) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
byUnit := map[string][]logLine{}
|
||||||
|
for _, it := range buf {
|
||||||
|
byUnit[it.unit] = append(byUnit[it.unit], it)
|
||||||
|
}
|
||||||
|
for unit, items := range byUnit {
|
||||||
|
ts := make([]int64, len(items))
|
||||||
|
ln := make([]string, len(items))
|
||||||
|
for i, it := range items {
|
||||||
|
ts[i] = it.ts
|
||||||
|
ln[i] = it.line
|
||||||
|
}
|
||||||
|
labels := map[string]string{"instance": cfg.Node, "job": "journald", "unit": unit}
|
||||||
|
if err := infra.PushLokiStream(cfg.LokiURL, cfg.User, cfg.Pass, labels, ts, ln); err != nil {
|
||||||
|
log.Printf("logs: push error (unit=%s, %d lines): %v", unit, len(items), err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
buf = buf[:0]
|
||||||
|
}
|
||||||
|
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case it := <-lines:
|
||||||
|
buf = append(buf, it)
|
||||||
|
if len(buf) >= 500 {
|
||||||
|
flush()
|
||||||
|
}
|
||||||
|
case <-ticker.C:
|
||||||
|
flush()
|
||||||
|
case <-ctx.Done():
|
||||||
|
flush()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -15,6 +15,7 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"flag"
|
"flag"
|
||||||
"log"
|
"log"
|
||||||
"os"
|
"os"
|
||||||
@@ -46,14 +47,27 @@ func main() {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
ticker := time.NewTicker(time.Duration(cfg.IntervalSec) * time.Second)
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
defer ticker.Stop()
|
defer cancel()
|
||||||
|
|
||||||
stop := make(chan os.Signal, 1)
|
stop := make(chan os.Signal, 1)
|
||||||
signal.Notify(stop, syscall.SIGINT, syscall.SIGTERM)
|
signal.Notify(stop, syscall.SIGINT, syscall.SIGTERM)
|
||||||
|
go func() {
|
||||||
|
<-stop
|
||||||
|
log.Print("shutting down")
|
||||||
|
cancel()
|
||||||
|
}()
|
||||||
|
|
||||||
// Push once right away so a freshly started node shows up immediately,
|
// Optional: ship systemd journal logs to Loki in the background.
|
||||||
// then keep pushing on every tick.
|
if cfg.LokiURL != "" {
|
||||||
|
go shipJournald(ctx, cfg)
|
||||||
|
}
|
||||||
|
|
||||||
|
ticker := time.NewTicker(time.Duration(cfg.IntervalSec) * time.Second)
|
||||||
|
defer ticker.Stop()
|
||||||
|
|
||||||
|
// Push metrics once right away so a freshly started node shows up
|
||||||
|
// immediately, then keep pushing on every tick.
|
||||||
if err := pushOnce(cfg); err != nil {
|
if err := pushOnce(cfg); err != nil {
|
||||||
log.Printf("push error: %v", err)
|
log.Printf("push error: %v", err)
|
||||||
}
|
}
|
||||||
@@ -63,8 +77,7 @@ func main() {
|
|||||||
if err := pushOnce(cfg); err != nil {
|
if err := pushOnce(cfg); err != nil {
|
||||||
log.Printf("push error: %v", err)
|
log.Printf("push error: %v", err)
|
||||||
}
|
}
|
||||||
case <-stop:
|
case <-ctx.Done():
|
||||||
log.Print("shutting down")
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user