feat: soporte Android/Termux — battery_file + log_file (file-IPC sin exec), tail de logcat, workarounds DNS(1.1.1.1)+CA(SSL_CERT_FILE), procesos Android-safe
This commit is contained in:
@@ -16,6 +16,14 @@ type Config struct {
|
|||||||
User string `json:"user"` // basic-auth user, shared by metrics and logs (empty disables auth)
|
User string `json:"user"` // basic-auth user, shared by metrics and logs (empty disables auth)
|
||||||
Pass string `json:"pass"` // basic-auth password
|
Pass string `json:"pass"` // basic-auth password
|
||||||
IntervalSec int `json:"interval_sec"` // metrics push period in seconds (default 15)
|
IntervalSec int `json:"interval_sec"` // metrics push period in seconds (default 15)
|
||||||
|
// Android/Termux exec workaround: the standard Go binary cannot exec
|
||||||
|
// subprocesses there (seccomp blocks pidfd_open with SIGSYS). When set, the
|
||||||
|
// agent reads battery JSON from this file (written by a shell helper) instead
|
||||||
|
// of running termux-battery-status itself.
|
||||||
|
BatteryFile string `json:"battery_file"`
|
||||||
|
// When set, the agent tails this log file (written by a shell `logcat`
|
||||||
|
// helper) and ships it to Loki, instead of exec-ing journald/logcat.
|
||||||
|
LogFile string `json:"log_file"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// defaultConfig returns the baseline configuration: the machine hostname as the
|
// defaultConfig returns the baseline configuration: the machine hostname as the
|
||||||
@@ -48,6 +56,12 @@ func loadConfig(path string) (Config, error) {
|
|||||||
if v := os.Getenv("FLEET_LOKI_URL"); v != "" {
|
if v := os.Getenv("FLEET_LOKI_URL"); v != "" {
|
||||||
cfg.LokiURL = v
|
cfg.LokiURL = v
|
||||||
}
|
}
|
||||||
|
if v := os.Getenv("FLEET_BATTERY_FILE"); v != "" {
|
||||||
|
cfg.BatteryFile = v
|
||||||
|
}
|
||||||
|
if v := os.Getenv("FLEET_LOG_FILE"); v != "" {
|
||||||
|
cfg.LogFile = v
|
||||||
|
}
|
||||||
if v := os.Getenv("FLEET_USER"); v != "" {
|
if v := os.Getenv("FLEET_USER"); v != "" {
|
||||||
cfg.User = v
|
cfg.User = v
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,16 +2,52 @@ package main
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"io"
|
||||||
"log"
|
"log"
|
||||||
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"strconv"
|
"strconv"
|
||||||
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"fn-registry/functions/infra"
|
"fn-registry/functions/infra"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// shipLogs picks the right log source for the platform and ships to Loki:
|
||||||
|
// systemd journald on Linux servers, logcat on Android/Termux. If neither is
|
||||||
|
// available it logs once and returns, leaving metrics shipping unaffected.
|
||||||
|
//
|
||||||
|
// Binaries are located with os.Stat (not exec.LookPath) and run by absolute
|
||||||
|
// path: on Android the faccessat2 syscall that LookPath uses is blocked by
|
||||||
|
// seccomp and crashes the process with SIGSYS.
|
||||||
|
func shipLogs(ctx context.Context, cfg Config) {
|
||||||
|
// Android/Termux: a shell helper writes `logcat` output to cfg.LogFile and
|
||||||
|
// we tail it (no exec, which seccomp would kill via pidfd_open SIGSYS).
|
||||||
|
if cfg.LogFile != "" {
|
||||||
|
shipFileTail(ctx, cfg, cfg.LogFile, "logcat")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// Linux servers: read the systemd journal directly.
|
||||||
|
if p := findBin("/usr/bin/journalctl", "/bin/journalctl"); p != "" {
|
||||||
|
shipJournald(ctx, cfg, p)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
log.Print("logs: no log source (no log_file nor journalctl), log shipping disabled")
|
||||||
|
}
|
||||||
|
|
||||||
|
// findBin returns the first candidate path that exists, or "".
|
||||||
|
func findBin(candidates ...string) string {
|
||||||
|
for _, c := range candidates {
|
||||||
|
if _, err := os.Stat(c); err == nil {
|
||||||
|
return c
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
// journalEntry is the subset of fields we read from `journalctl -o json`.
|
// journalEntry is the subset of fields we read from `journalctl -o json`.
|
||||||
type journalEntry struct {
|
type journalEntry struct {
|
||||||
Message json.RawMessage `json:"MESSAGE"`
|
Message json.RawMessage `json:"MESSAGE"`
|
||||||
@@ -64,8 +100,8 @@ type logLine struct {
|
|||||||
// batches, grouped into one stream per unit. It returns when ctx is cancelled.
|
// batches, grouped into one stream per unit. It returns when ctx is cancelled.
|
||||||
// If journalctl is not available (e.g. on Android/Termux) it logs once and exits
|
// If journalctl is not available (e.g. on Android/Termux) it logs once and exits
|
||||||
// without error, leaving metrics shipping unaffected.
|
// without error, leaving metrics shipping unaffected.
|
||||||
func shipJournald(ctx context.Context, cfg Config) {
|
func shipJournald(ctx context.Context, cfg Config, binPath string) {
|
||||||
cmd := exec.CommandContext(ctx, "journalctl", "-f", "-o", "json", "-n", "0", "--no-pager")
|
cmd := exec.CommandContext(ctx, binPath, "-f", "-o", "json", "-n", "0", "--no-pager")
|
||||||
stdout, err := cmd.StdoutPipe()
|
stdout, err := cmd.StdoutPipe()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf("logs: cannot pipe journalctl: %v", err)
|
log.Printf("logs: cannot pipe journalctl: %v", err)
|
||||||
@@ -146,3 +182,96 @@ func shipJournald(ctx context.Context, cfg Config) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// shipFileTail tails a growing log file (written by an external shell helper,
|
||||||
|
// e.g. `logcat -v epoch` on Android/Termux) and pushes new lines to Loki under
|
||||||
|
// one stream (job=<job>). It does NO exec — only file reads — so it is safe on
|
||||||
|
// Android where exec from Go is blocked by seccomp. Handles truncation/rotation
|
||||||
|
// by detecting a shrinking file and restarting from offset 0.
|
||||||
|
func shipFileTail(ctx context.Context, cfg Config, path, job string) {
|
||||||
|
log.Printf("logs: tailing %s for Loki (job=%s)", path, job)
|
||||||
|
|
||||||
|
var offset int64
|
||||||
|
if fi, err := os.Stat(path); err == nil {
|
||||||
|
offset = fi.Size() // skip pre-existing history on first start
|
||||||
|
}
|
||||||
|
labels := map[string]string{"instance": cfg.Node, "job": job}
|
||||||
|
|
||||||
|
ticker := time.NewTicker(3 * time.Second)
|
||||||
|
defer ticker.Stop()
|
||||||
|
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
case <-ticker.C:
|
||||||
|
f, err := os.Open(path)
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
fi, err := f.Stat()
|
||||||
|
if err != nil {
|
||||||
|
f.Close()
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if fi.Size() < offset {
|
||||||
|
offset = 0 // file was truncated or rotated
|
||||||
|
}
|
||||||
|
if fi.Size() == offset {
|
||||||
|
f.Close()
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if _, err := f.Seek(offset, io.SeekStart); err != nil {
|
||||||
|
f.Close()
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
data, err := io.ReadAll(f)
|
||||||
|
f.Close()
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// Only consume up to the last complete line; keep the remainder for
|
||||||
|
// the next tick so we never ship a half-written line.
|
||||||
|
lastNL := bytes.LastIndexByte(data, '\n')
|
||||||
|
if lastNL < 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
offset += int64(lastNL + 1)
|
||||||
|
|
||||||
|
var ts []int64
|
||||||
|
var ln []string
|
||||||
|
now := time.Now().UnixNano()
|
||||||
|
for _, raw := range strings.Split(string(data[:lastNL]), "\n") {
|
||||||
|
raw = strings.TrimSpace(raw)
|
||||||
|
if raw == "" || strings.HasPrefix(raw, "---------") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
t, msg := parseLogcatEpoch(raw, now)
|
||||||
|
ts = append(ts, t)
|
||||||
|
ln = append(ln, msg)
|
||||||
|
}
|
||||||
|
if len(ln) == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if err := infra.PushLokiStream(cfg.LokiURL, cfg.User, cfg.Pass, labels, ts, ln); err != nil {
|
||||||
|
log.Printf("logs: file push error (%d lines): %v", len(ln), err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseLogcatEpoch splits a `-v epoch` logcat line into a nanosecond timestamp
|
||||||
|
// and the remaining text. Lines look like: "1609459200.123 1234 1235 I Tag: msg".
|
||||||
|
// On any parse failure it returns the fallback timestamp and the raw line.
|
||||||
|
func parseLogcatEpoch(raw string, fallback int64) (int64, string) {
|
||||||
|
sp := strings.IndexByte(raw, ' ')
|
||||||
|
if sp <= 0 {
|
||||||
|
return fallback, raw
|
||||||
|
}
|
||||||
|
secs, err := strconv.ParseFloat(raw[:sp], 64)
|
||||||
|
if err != nil {
|
||||||
|
return fallback, raw
|
||||||
|
}
|
||||||
|
rest := strings.TrimSpace(raw[sp:])
|
||||||
|
return int64(secs * 1e9), rest
|
||||||
|
}
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"flag"
|
"flag"
|
||||||
"log"
|
"log"
|
||||||
|
"net"
|
||||||
"os"
|
"os"
|
||||||
"os/signal"
|
"os/signal"
|
||||||
"syscall"
|
"syscall"
|
||||||
@@ -26,11 +27,45 @@ import (
|
|||||||
"fn-registry/functions/infra"
|
"fn-registry/functions/infra"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// androidWorkarounds fixes two things that break a standard cross-compiled Go
|
||||||
|
// binary on Android/Termux (no-op on Linux servers):
|
||||||
|
// - DNS: the pure-Go resolver reads /etc/resolv.conf, which on Android does
|
||||||
|
// not reflect the system DNS, so lookups hit ::1:53 and fail. We point the
|
||||||
|
// default resolver at a public DNS server explicitly.
|
||||||
|
// - TLS: Go's default CA bundle paths don't exist on Android, so HTTPS fails
|
||||||
|
// with "certificate signed by unknown authority". We point SSL_CERT_FILE at
|
||||||
|
// the Termux ca-certificates bundle (read lazily by crypto/x509 on first use).
|
||||||
|
func androidWorkarounds() {
|
||||||
|
if os.Getenv("ANDROID_ROOT") == "" && os.Getenv("ANDROID_DATA") == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
net.DefaultResolver = &net.Resolver{
|
||||||
|
PreferGo: true,
|
||||||
|
Dial: func(ctx context.Context, network, address string) (net.Conn, error) {
|
||||||
|
d := net.Dialer{Timeout: 5 * time.Second}
|
||||||
|
return d.DialContext(ctx, "udp", "1.1.1.1:53")
|
||||||
|
},
|
||||||
|
}
|
||||||
|
if os.Getenv("SSL_CERT_FILE") == "" {
|
||||||
|
prefix := os.Getenv("PREFIX")
|
||||||
|
if prefix == "" {
|
||||||
|
prefix = "/data/data/com.termux/files/usr"
|
||||||
|
}
|
||||||
|
cert := prefix + "/etc/tls/cert.pem"
|
||||||
|
if _, err := os.Stat(cert); err == nil {
|
||||||
|
os.Setenv("SSL_CERT_FILE", cert)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
log.Print("android: using 1.1.1.1 resolver + Termux CA bundle")
|
||||||
|
}
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
configPath := flag.String("config", "", "path to JSON config file")
|
configPath := flag.String("config", "", "path to JSON config file")
|
||||||
once := flag.Bool("once", false, "collect and push a single time, then exit (useful for testing)")
|
once := flag.Bool("once", false, "collect and push a single time, then exit (useful for testing)")
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
|
|
||||||
|
androidWorkarounds()
|
||||||
|
|
||||||
cfg, err := loadConfig(*configPath)
|
cfg, err := loadConfig(*configPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("config: %v", err)
|
log.Fatalf("config: %v", err)
|
||||||
@@ -58,9 +93,10 @@ func main() {
|
|||||||
cancel()
|
cancel()
|
||||||
}()
|
}()
|
||||||
|
|
||||||
// Optional: ship systemd journal logs to Loki in the background.
|
// Optional: ship logs to Loki in the background (journald on Linux,
|
||||||
|
// logcat on Android/Termux).
|
||||||
if cfg.LokiURL != "" {
|
if cfg.LokiURL != "" {
|
||||||
go shipJournald(ctx, cfg)
|
go shipLogs(ctx, cfg)
|
||||||
}
|
}
|
||||||
|
|
||||||
ticker := time.NewTicker(time.Duration(cfg.IntervalSec) * time.Second)
|
ticker := time.NewTicker(time.Duration(cfg.IntervalSec) * time.Second)
|
||||||
@@ -89,6 +125,10 @@ func pushOnce(cfg Config) error {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
// Battery metrics are best-effort. On Android/Termux the agent cannot exec
|
||||||
|
// termux-battery-status (seccomp), so a shell helper writes its JSON to
|
||||||
|
// cfg.BatteryFile and we parse that here; elsewhere we collect directly.
|
||||||
|
samples = append(samples, batterySamples(cfg)...)
|
||||||
body := infra.FormatPromExposition(samples, time.Now().UnixMilli())
|
body := infra.FormatPromExposition(samples, time.Now().UnixMilli())
|
||||||
if err := infra.PushPromRemote(cfg.HubURL, cfg.User, cfg.Pass, body, map[string]string{"instance": cfg.Node}); err != nil {
|
if err := infra.PushPromRemote(cfg.HubURL, cfg.User, cfg.Pass, body, map[string]string{"instance": cfg.Node}); err != nil {
|
||||||
return err
|
return err
|
||||||
@@ -96,3 +136,25 @@ func pushOnce(cfg Config) error {
|
|||||||
log.Printf("pushed %d samples", len(samples))
|
log.Printf("pushed %d samples", len(samples))
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// batterySamples returns battery metrics, reading them from a JSON file when
|
||||||
|
// cfg.BatteryFile is set (Android path) or collecting them directly otherwise.
|
||||||
|
// Always best-effort: any error yields no samples rather than failing the push.
|
||||||
|
func batterySamples(cfg Config) []infra.PromSample {
|
||||||
|
if cfg.BatteryFile != "" {
|
||||||
|
data, err := os.ReadFile(cfg.BatteryFile)
|
||||||
|
if err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
s, err := infra.BatterySamplesFromJSON(data)
|
||||||
|
if err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
s, err := infra.CollectBatteryMetrics()
|
||||||
|
if err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user