diff --git a/hub/dashboards/fleet-overview.json b/hub/dashboards/fleet-overview.json index 457af03..9aea326 100644 --- a/hub/dashboards/fleet-overview.json +++ b/hub/dashboards/fleet-overview.json @@ -4,10 +4,26 @@ "tags": ["fleet"], "timezone": "browser", "schemaVersion": 39, - "version": 2, + "version": 3, "refresh": "15s", "time": { "from": "now-3h", "to": "now" }, - "templating": { "list": [] }, + "templating": { + "list": [ + { + "name": "role", + "label": "Rol", + "type": "query", + "datasource": { "type": "prometheus", "uid": "victoriametrics" }, + "query": "label_values(node_uptime_seconds, role)", + "refresh": 2, + "includeAll": true, + "multi": true, + "allValue": ".*", + "current": { "text": ["All"], "value": ["$__all"] }, + "sort": 1 + } + ] + }, "panels": [ { "id": 1, @@ -15,7 +31,7 @@ "title": "Nodos reportando", "gridPos": { "h": 4, "w": 4, "x": 0, "y": 0 }, "datasource": { "type": "prometheus", "uid": "victoriametrics" }, - "targets": [ { "refId": "A", "expr": "count(group by(instance) (last_over_time(node_uptime_seconds[2m])))", "datasource": { "type": "prometheus", "uid": "victoriametrics" } } ], + "targets": [ { "refId": "A", "expr": "count(group by(instance) (last_over_time(node_uptime_seconds{role=~\"$role\"}[2m])))", "datasource": { "type": "prometheus", "uid": "victoriametrics" } } ], "fieldConfig": { "defaults": { "unit": "none", "color": { "mode": "fixed", "fixedColor": "green" } }, "overrides": [] }, "options": { "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "colorMode": "value", "graphMode": "none", "textMode": "value" } }, @@ -25,7 +41,7 @@ "title": "Nodos caídos", "gridPos": { "h": 4, "w": 4, "x": 4, "y": 0 }, "datasource": { "type": "prometheus", "uid": "victoriametrics" }, - "targets": [ { "refId": "A", "expr": "(max_over_time((count(group by(instance) (last_over_time(node_uptime_seconds[2m]))))[24h:1m])) - count(group by(instance) (last_over_time(node_uptime_seconds[2m])))", "datasource": { "type": "prometheus", "uid": "victoriametrics" } } ], + "targets": [ { "refId": "A", "expr": "(max_over_time((count(group by(instance) (last_over_time(node_uptime_seconds{role=~\"$role\"}[2m]))))[24h:1m])) - count(group by(instance) (last_over_time(node_uptime_seconds{role=~\"$role\"}[2m])))", "datasource": { "type": "prometheus", "uid": "victoriametrics" } } ], "fieldConfig": { "defaults": { "unit": "none", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 1 } ] } }, "overrides": [] }, "options": { "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "colorMode": "background", "graphMode": "none", "textMode": "value" } }, @@ -35,7 +51,7 @@ "title": "CPU máx", "gridPos": { "h": 4, "w": 4, "x": 8, "y": 0 }, "datasource": { "type": "prometheus", "uid": "victoriametrics" }, - "targets": [ { "refId": "A", "expr": "max(node_cpu_percent)", "datasource": { "type": "prometheus", "uid": "victoriametrics" } } ], + "targets": [ { "refId": "A", "expr": "max(node_cpu_percent{role=~\"$role\"})", "datasource": { "type": "prometheus", "uid": "victoriametrics" } } ], "fieldConfig": { "defaults": { "unit": "percent", "min": 0, "max": 100, "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "yellow", "value": 70 }, { "color": "red", "value": 90 } ] } }, "overrides": [] }, "options": { "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "colorMode": "value", "graphMode": "area" } }, @@ -45,7 +61,7 @@ "title": "RAM máx", "gridPos": { "h": 4, "w": 4, "x": 12, "y": 0 }, "datasource": { "type": "prometheus", "uid": "victoriametrics" }, - "targets": [ { "refId": "A", "expr": "max(node_mem_used_percent)", "datasource": { "type": "prometheus", "uid": "victoriametrics" } } ], + "targets": [ { "refId": "A", "expr": "max(node_mem_used_percent{role=~\"$role\"})", "datasource": { "type": "prometheus", "uid": "victoriametrics" } } ], "fieldConfig": { "defaults": { "unit": "percent", "min": 0, "max": 100, "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "yellow", "value": 75 }, { "color": "red", "value": 90 } ] } }, "overrides": [] }, "options": { "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "colorMode": "value", "graphMode": "area" } }, @@ -55,7 +71,7 @@ "title": "Disco máx", "gridPos": { "h": 4, "w": 4, "x": 16, "y": 0 }, "datasource": { "type": "prometheus", "uid": "victoriametrics" }, - "targets": [ { "refId": "A", "expr": "max(node_disk_used_percent)", "datasource": { "type": "prometheus", "uid": "victoriametrics" } } ], + "targets": [ { "refId": "A", "expr": "max(node_disk_used_percent{role=~\"$role\"})", "datasource": { "type": "prometheus", "uid": "victoriametrics" } } ], "fieldConfig": { "defaults": { "unit": "percent", "min": 0, "max": 100, "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "yellow", "value": 80 }, { "color": "red", "value": 90 } ] } }, "overrides": [] }, "options": { "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "colorMode": "value", "graphMode": "area" } }, @@ -65,7 +81,7 @@ "title": "Load máx", "gridPos": { "h": 4, "w": 4, "x": 20, "y": 0 }, "datasource": { "type": "prometheus", "uid": "victoriametrics" }, - "targets": [ { "refId": "A", "expr": "max(node_load1)", "datasource": { "type": "prometheus", "uid": "victoriametrics" } } ], + "targets": [ { "refId": "A", "expr": "max(node_load1{role=~\"$role\"})", "datasource": { "type": "prometheus", "uid": "victoriametrics" } } ], "fieldConfig": { "defaults": { "unit": "short", "color": { "mode": "fixed", "fixedColor": "blue" } }, "overrides": [] }, "options": { "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "colorMode": "value", "graphMode": "area" } }, @@ -75,7 +91,7 @@ "title": "CPU % por nodo", "gridPos": { "h": 6, "w": 12, "x": 0, "y": 4 }, "datasource": { "type": "prometheus", "uid": "victoriametrics" }, - "targets": [ { "refId": "A", "expr": "node_cpu_percent", "legendFormat": "{{instance}}", "datasource": { "type": "prometheus", "uid": "victoriametrics" } } ], + "targets": [ { "refId": "A", "expr": "node_cpu_percent{role=~\"$role\"}", "legendFormat": "{{instance}}", "datasource": { "type": "prometheus", "uid": "victoriametrics" } } ], "fieldConfig": { "defaults": { "unit": "percent", "min": 0, "max": 100, "custom": { "drawStyle": "line", "fillOpacity": 10, "showPoints": "never", "lineWidth": 2 } }, "overrides": [] }, "options": { "legend": { "displayMode": "table", "placement": "right", "calcs": ["lastNotNull", "max"] }, "tooltip": { "mode": "multi", "sort": "desc" } } }, @@ -85,7 +101,7 @@ "title": "RAM % por nodo", "gridPos": { "h": 6, "w": 12, "x": 12, "y": 4 }, "datasource": { "type": "prometheus", "uid": "victoriametrics" }, - "targets": [ { "refId": "A", "expr": "node_mem_used_percent", "legendFormat": "{{instance}}", "datasource": { "type": "prometheus", "uid": "victoriametrics" } } ], + "targets": [ { "refId": "A", "expr": "node_mem_used_percent{role=~\"$role\"}", "legendFormat": "{{instance}}", "datasource": { "type": "prometheus", "uid": "victoriametrics" } } ], "fieldConfig": { "defaults": { "unit": "percent", "min": 0, "max": 100, "custom": { "drawStyle": "line", "fillOpacity": 10, "showPoints": "never", "lineWidth": 2 } }, "overrides": [] }, "options": { "legend": { "displayMode": "table", "placement": "right", "calcs": ["lastNotNull", "max"] }, "tooltip": { "mode": "multi", "sort": "desc" } } }, @@ -95,7 +111,7 @@ "title": "Load 1m por nodo", "gridPos": { "h": 6, "w": 12, "x": 0, "y": 10 }, "datasource": { "type": "prometheus", "uid": "victoriametrics" }, - "targets": [ { "refId": "A", "expr": "node_load1", "legendFormat": "{{instance}}", "datasource": { "type": "prometheus", "uid": "victoriametrics" } } ], + "targets": [ { "refId": "A", "expr": "node_load1{role=~\"$role\"}", "legendFormat": "{{instance}}", "datasource": { "type": "prometheus", "uid": "victoriametrics" } } ], "fieldConfig": { "defaults": { "unit": "short", "min": 0, "custom": { "drawStyle": "line", "fillOpacity": 10, "showPoints": "never", "lineWidth": 2 } }, "overrides": [] }, "options": { "legend": { "displayMode": "table", "placement": "right", "calcs": ["lastNotNull", "max"] }, "tooltip": { "mode": "multi", "sort": "desc" } } }, @@ -105,7 +121,7 @@ "title": "Disco usado % (máx por nodo)", "gridPos": { "h": 6, "w": 12, "x": 12, "y": 10 }, "datasource": { "type": "prometheus", "uid": "victoriametrics" }, - "targets": [ { "refId": "A", "expr": "max by(instance) (node_disk_used_percent)", "legendFormat": "{{instance}}", "datasource": { "type": "prometheus", "uid": "victoriametrics" } } ], + "targets": [ { "refId": "A", "expr": "max by(instance) (node_disk_used_percent{role=~\"$role\"})", "legendFormat": "{{instance}}", "datasource": { "type": "prometheus", "uid": "victoriametrics" } } ], "fieldConfig": { "defaults": { "unit": "percent", "min": 0, "max": 100, "custom": { "drawStyle": "line", "fillOpacity": 10, "showPoints": "never", "lineWidth": 2 } }, "overrides": [] }, "options": { "legend": { "displayMode": "table", "placement": "right", "calcs": ["lastNotNull", "max"] }, "tooltip": { "mode": "multi", "sort": "desc" } } }, @@ -115,7 +131,7 @@ "title": "Red recibida (sum por nodo)", "gridPos": { "h": 6, "w": 12, "x": 0, "y": 16 }, "datasource": { "type": "prometheus", "uid": "victoriametrics" }, - "targets": [ { "refId": "A", "expr": "sum by(instance) (rate(node_net_recv_bytes[2m]))", "legendFormat": "{{instance}}", "datasource": { "type": "prometheus", "uid": "victoriametrics" } } ], + "targets": [ { "refId": "A", "expr": "sum by(instance) (rate(node_net_recv_bytes{role=~\"$role\"}[2m]))", "legendFormat": "{{instance}}", "datasource": { "type": "prometheus", "uid": "victoriametrics" } } ], "fieldConfig": { "defaults": { "unit": "Bps", "min": 0, "custom": { "drawStyle": "line", "fillOpacity": 10, "showPoints": "never", "lineWidth": 2 } }, "overrides": [] }, "options": { "legend": { "displayMode": "table", "placement": "right", "calcs": ["lastNotNull", "max"] }, "tooltip": { "mode": "multi", "sort": "desc" } } }, @@ -125,7 +141,7 @@ "title": "Red enviada (sum por nodo)", "gridPos": { "h": 6, "w": 12, "x": 12, "y": 16 }, "datasource": { "type": "prometheus", "uid": "victoriametrics" }, - "targets": [ { "refId": "A", "expr": "sum by(instance) (rate(node_net_sent_bytes[2m]))", "legendFormat": "{{instance}}", "datasource": { "type": "prometheus", "uid": "victoriametrics" } } ], + "targets": [ { "refId": "A", "expr": "sum by(instance) (rate(node_net_sent_bytes{role=~\"$role\"}[2m]))", "legendFormat": "{{instance}}", "datasource": { "type": "prometheus", "uid": "victoriametrics" } } ], "fieldConfig": { "defaults": { "unit": "Bps", "min": 0, "custom": { "drawStyle": "line", "fillOpacity": 10, "showPoints": "never", "lineWidth": 2 } }, "overrides": [] }, "options": { "legend": { "displayMode": "table", "placement": "right", "calcs": ["lastNotNull", "max"] }, "tooltip": { "mode": "multi", "sort": "desc" } } } diff --git a/hub/deploy_agent.sh b/hub/deploy_agent.sh index e0286ff..18d892a 100755 --- a/hub/deploy_agent.sh +++ b/hub/deploy_agent.sh @@ -1,9 +1,10 @@ #!/usr/bin/env bash # Despliega metrics_agent en un nodo remoto como servicio systemd. # -# Uso: ./deploy_agent.sh [arch] +# Uso: ./deploy_agent.sh [arch] # node_name : valor de la label "instance" en Grafana (ej. homer) # ssh_host : alias SSH del nodo (debe existir en ~/.ssh/config) +# role : label "role" para filtrar en Grafana (local | vps | movil) # arch : amd64 (default) | arm64 # # Requisitos: @@ -14,9 +15,10 @@ # - sudo -n (sin password) disponible en el nodo remoto. set -euo pipefail -NODE="${1:?uso: deploy_agent.sh [arch]}" -HOST="${2:?uso: deploy_agent.sh [arch]}" -ARCH="${3:-amd64}" +NODE="${1:?uso: deploy_agent.sh [arch]}" +HOST="${2:?uso: deploy_agent.sh [arch]}" +ROLE="${3:?uso: deploy_agent.sh [arch]}" +ARCH="${4:-amd64}" HUB="https://metrics-dxaqj3ina6eqd5pjt85wkrrj.organic-machine.com/api/v1/import/prometheus" LOKI="https://logs-wmaxecsjcfnocz81d5luca92.organic-machine.com/loki/api/v1/push" @@ -29,7 +31,7 @@ echo ">> copiando binario a $HOST" scp -q -o BatchMode=yes "$BIN" "$HOST:/tmp/metrics_agent" echo ">> instalando servicio en $NODE ($HOST)" -ssh -o BatchMode=yes "$HOST" "NODE='$NODE' PW='$PW' HUB='$HUB' LOKI='$LOKI' bash -s" <<'OUTER' +ssh -o BatchMode=yes "$HOST" "NODE='$NODE' PW='$PW' HUB='$HUB' LOKI='$LOKI' ROLE='$ROLE' bash -s" <<'OUTER' set -e sudo -n mkdir -p /opt/fleet-agent /etc/fleet-agent sudo -n mv /tmp/metrics_agent /opt/fleet-agent/metrics_agent @@ -41,7 +43,8 @@ sudo -n tee /etc/fleet-agent/agent.json >/dev/null <> instalando servicio Termux en $NODE ($HOST)" -ssh -o BatchMode=yes "$HOST" "NODE='$NODE' PW='$PW' HUB='$HUB' LOKI='$LOKI' bash -s" <<'OUTER' +ssh -o BatchMode=yes "$HOST" "NODE='$NODE' PW='$PW' HUB='$HUB' LOKI='$LOKI' ROLE='$ROLE' bash -s" <<'OUTER' set -e PREFIX=/data/data/com.termux/files/usr HM=/data/data/com.termux/files/home @@ -41,6 +42,7 @@ cat > "$HM/fleet-agent/agent.json" <