package infra import ( "fmt" "os" "sort" "strconv" "time" "github.com/shirou/gopsutil/v4/cpu" "github.com/shirou/gopsutil/v4/disk" "github.com/shirou/gopsutil/v4/host" "github.com/shirou/gopsutil/v4/load" "github.com/shirou/gopsutil/v4/mem" "github.com/shirou/gopsutil/v4/net" "github.com/shirou/gopsutil/v4/process" "github.com/shirou/gopsutil/v4/sensors" ) // isAndroidHost indica si el host es Android (incluido Termux). Se usa para // evitar rutas de gopsutil que invocan os.FindProcess -> pidfd_open, syscall // bloqueado por el seccomp de Android que mata el proceso con SIGSYS. func isAndroidHost() bool { if os.Getenv("ANDROID_ROOT") != "" || os.Getenv("ANDROID_DATA") != "" { return true } if _, err := os.Stat("/system/build.prop"); err == nil { return true } return false } // pseudoFstypes son filesystems virtuales que no representan almacenamiento // real y se ignoran al recolectar metricas de particiones. var pseudoFstypes = map[string]bool{ "tmpfs": true, "devtmpfs": true, "overlay": true, "squashfs": true, "proc": true, "sysfs": true, "cgroup": true, "cgroup2": true, "devpts": true, "mqueue": true, "debugfs": true, "tracefs": true, "fusectl": true, "configfs": true, "pstore": true, "bpf": true, "securityfs": true, } // CollectHostMetrics recolecta metricas del host actual (CPU, memoria, swap, // disco, red, temperaturas y procesos) y las devuelve como un slice de // PromSample con nombres estilo node_exporter simplificados. // // Es robusta: cada grupo de colector se ejecuta en su propio bloque con manejo // de error local. Si un colector secundario falla (red, temperaturas, etc.) se // omite ese grupo sin abortar. Solo retorna error si falla la informacion // basica de host (uptime), que se considera el minimo imprescindible. // // Funciona en Linux amd64 y Android/Termux (linux arm64): las temperaturas son // best-effort y se omiten si no hay sensores disponibles (tipico en Android). func CollectHostMetrics() ([]PromSample, error) { var samples []PromSample // --- Host basico: uptime (imprescindible, error si falla) --- uptime, err := host.Uptime() if err != nil { return nil, fmt.Errorf("collect host uptime: %w", err) } samples = append(samples, PromSample{ Name: "node_uptime_seconds", Value: float64(uptime), }) // --- Load average (linux/darwin; best-effort) --- if avg, err := load.Avg(); err == nil && avg != nil { samples = append(samples, PromSample{Name: "node_load1", Value: avg.Load1}, PromSample{Name: "node_load5", Value: avg.Load5}, PromSample{Name: "node_load15", Value: avg.Load15}, ) } // --- CPU global (intervalo corto de muestreo) --- if pcts, err := cpu.Percent(200*time.Millisecond, false); err == nil && len(pcts) > 0 { samples = append(samples, PromSample{ Name: "node_cpu_percent", Value: pcts[0], }) } // --- CPU por nucleo --- if pcts, err := cpu.Percent(200*time.Millisecond, true); err == nil { for i, p := range pcts { samples = append(samples, PromSample{ Name: "node_cpu_core_percent", Labels: map[string]string{"core": strconv.Itoa(i)}, Value: p, }) } } // --- Memoria virtual --- if vm, err := mem.VirtualMemory(); err == nil && vm != nil { samples = append(samples, PromSample{Name: "node_mem_total_bytes", Value: float64(vm.Total)}, PromSample{Name: "node_mem_used_bytes", Value: float64(vm.Used)}, PromSample{Name: "node_mem_available_bytes", Value: float64(vm.Available)}, PromSample{Name: "node_mem_used_percent", Value: vm.UsedPercent}, ) } // --- Swap --- if sw, err := mem.SwapMemory(); err == nil && sw != nil { samples = append(samples, PromSample{Name: "node_swap_total_bytes", Value: float64(sw.Total)}, PromSample{Name: "node_swap_used_bytes", Value: float64(sw.Used)}, ) } // --- Particiones fisicas (ignora fstypes pseudo) --- if parts, err := disk.Partitions(false); err == nil { for _, p := range parts { if pseudoFstypes[p.Fstype] { continue } u, err := disk.Usage(p.Mountpoint) if err != nil || u == nil { continue } lbl := map[string]string{"mount": p.Mountpoint} samples = append(samples, PromSample{Name: "node_disk_total_bytes", Labels: lbl, Value: float64(u.Total)}, PromSample{Name: "node_disk_used_bytes", Labels: lbl, Value: float64(u.Used)}, PromSample{Name: "node_disk_used_percent", Labels: lbl, Value: u.UsedPercent}, ) } } // --- Contadores I/O por dispositivo --- if io, err := disk.IOCounters(); err == nil { for dev, c := range io { lbl := map[string]string{"device": dev} samples = append(samples, PromSample{Name: "node_disk_read_bytes", Labels: lbl, Value: float64(c.ReadBytes)}, PromSample{Name: "node_disk_write_bytes", Labels: lbl, Value: float64(c.WriteBytes)}, ) } } // --- Red por interfaz (excluye loopback "lo") --- if nics, err := net.IOCounters(true); err == nil { for _, n := range nics { if n.Name == "lo" { continue } lbl := map[string]string{"iface": n.Name} samples = append(samples, PromSample{Name: "node_net_recv_bytes", Labels: lbl, Value: float64(n.BytesRecv)}, PromSample{Name: "node_net_sent_bytes", Labels: lbl, Value: float64(n.BytesSent)}, PromSample{Name: "node_net_recv_errs", Labels: lbl, Value: float64(n.Errin)}, PromSample{Name: "node_net_sent_errs", Labels: lbl, Value: float64(n.Errout)}, ) } } // --- Temperaturas (best-effort; omite el grupo si falla o no hay sensores) --- if temps, err := sensors.SensorsTemperatures(); err == nil { for _, t := range temps { if t.SensorKey == "" { continue } samples = append(samples, PromSample{ Name: "node_temp_celsius", Labels: map[string]string{"sensor": t.SensorKey}, Value: t.Temperature, }) } } // --- Procesos: total + top 5 por CPU --- // En Android (Termux) gopsutil process.Processes() llama internamente a // os.FindProcess, que usa el syscall pidfd_open bloqueado por el seccomp de // Android (mata el proceso con SIGSYS, no recuperable). Alli contamos los // PIDs con process.Pids() (que solo lee /proc, sin FindProcess) y omitimos // el top por CPU. if isAndroidHost() { if pids, err := process.Pids(); err == nil { samples = append(samples, PromSample{ Name: "node_procs_total", Value: float64(len(pids)), }) } } else if procs, err := process.Processes(); err == nil { samples = append(samples, PromSample{ Name: "node_procs_total", Value: float64(len(procs)), }) type procStat struct { pid int32 name string cpu float64 mem float32 } stats := make([]procStat, 0, len(procs)) for _, p := range procs { cpuPct, err := p.CPUPercent() if err != nil { continue } name, err := p.Name() if err != nil { name = "" } memPct, err := p.MemoryPercent() if err != nil { memPct = 0 } stats = append(stats, procStat{pid: p.Pid, name: name, cpu: cpuPct, mem: memPct}) } sort.Slice(stats, func(i, j int) bool { return stats[i].cpu > stats[j].cpu }) top := stats if len(top) > 5 { top = top[:5] } for _, s := range top { lbl := map[string]string{ "pid": strconv.Itoa(int(s.pid)), "name": s.name, } samples = append(samples, PromSample{Name: "node_proc_cpu_percent", Labels: lbl, Value: s.cpu}, PromSample{Name: "node_proc_mem_percent", Labels: lbl, Value: float64(s.mem)}, ) } } return samples, nil }