From a099488a9d478141bc63a20f7cc3db4a1217b7ac Mon Sep 17 00:00:00 2001 From: Egutierrez Date: Sun, 7 Jun 2026 13:22:00 +0200 Subject: [PATCH] =?UTF-8?q?feat(hub):=20Loki=20(config+datasource),=20pane?= =?UTF-8?q?l=20de=20logs=20en=20node=20detail,=20stat=20nodos=20ca=C3=ADdo?= =?UTF-8?q?s=20+=20overview=20compacto,=20loki=5Furl=20en=20deploy=5Fagent?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- hub/dashboards/fleet-node.json | 9 ++ hub/dashboards/fleet-overview.json | 90 +++++++++---------- hub/deploy_agent.sh | 7 +- hub/grafana-provisioning/loki-datasource.yaml | 10 +++ hub/loki-config.yaml | 42 +++++++++ 5 files changed, 111 insertions(+), 47 deletions(-) create mode 100644 hub/grafana-provisioning/loki-datasource.yaml create mode 100644 hub/loki-config.yaml diff --git a/hub/dashboards/fleet-node.json b/hub/dashboards/fleet-node.json index b7878d9..16f69c2 100644 --- a/hub/dashboards/fleet-node.json +++ b/hub/dashboards/fleet-node.json @@ -160,6 +160,15 @@ ], "fieldConfig": { "defaults": {}, "overrides": [ { "matcher": { "id": "byName", "options": "RAM %" }, "properties": [ { "id": "unit", "value": "percent" }, { "id": "custom.cellOptions", "value": { "type": "gauge" } }, { "id": "max", "value": 100 } ] } ] }, "options": { "sortBy": [ { "displayName": "RAM %", "desc": true } ] } + }, + { + "id": 13, + "type": "logs", + "title": "Logs (journald)", + "gridPos": { "h": 11, "w": 24, "x": 0, "y": 41 }, + "datasource": { "type": "loki", "uid": "loki" }, + "targets": [ { "refId": "A", "expr": "{instance=\"$node\", job=\"journald\"}", "datasource": { "type": "loki", "uid": "loki" } } ], + "options": { "showTime": true, "wrapLogMessage": true, "prettifyLogMessage": false, "enableLogDetails": true, "dedupStrategy": "none", "sortOrder": "Descending" } } ] } diff --git a/hub/dashboards/fleet-overview.json b/hub/dashboards/fleet-overview.json index 44d80c4..457af03 100644 --- a/hub/dashboards/fleet-overview.json +++ b/hub/dashboards/fleet-overview.json @@ -4,7 +4,7 @@ "tags": ["fleet"], "timezone": "browser", "schemaVersion": 39, - "version": 1, + "version": 2, "refresh": "15s", "time": { "from": "now-3h", "to": "now" }, "templating": { "list": [] }, @@ -12,60 +12,70 @@ { "id": 1, "type": "stat", - "title": "Nodos reportando (<2m)", - "gridPos": { "h": 4, "w": 6, "x": 0, "y": 0 }, + "title": "Nodos reportando", + "gridPos": { "h": 4, "w": 4, "x": 0, "y": 0 }, "datasource": { "type": "prometheus", "uid": "victoriametrics" }, - "targets": [ - { "refId": "A", "expr": "count(group by(instance) (last_over_time(node_uptime_seconds[2m])))", "datasource": { "type": "prometheus", "uid": "victoriametrics" } } - ], + "targets": [ { "refId": "A", "expr": "count(group by(instance) (last_over_time(node_uptime_seconds[2m])))", "datasource": { "type": "prometheus", "uid": "victoriametrics" } } ], "fieldConfig": { "defaults": { "unit": "none", "color": { "mode": "fixed", "fixedColor": "green" } }, "overrides": [] }, "options": { "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "colorMode": "value", "graphMode": "none", "textMode": "value" } }, + { + "id": 11, + "type": "stat", + "title": "Nodos caídos", + "gridPos": { "h": 4, "w": 4, "x": 4, "y": 0 }, + "datasource": { "type": "prometheus", "uid": "victoriametrics" }, + "targets": [ { "refId": "A", "expr": "(max_over_time((count(group by(instance) (last_over_time(node_uptime_seconds[2m]))))[24h:1m])) - count(group by(instance) (last_over_time(node_uptime_seconds[2m])))", "datasource": { "type": "prometheus", "uid": "victoriametrics" } } ], + "fieldConfig": { "defaults": { "unit": "none", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 1 } ] } }, "overrides": [] }, + "options": { "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "colorMode": "background", "graphMode": "none", "textMode": "value" } + }, { "id": 2, "type": "stat", - "title": "CPU máx flota", - "gridPos": { "h": 4, "w": 6, "x": 6, "y": 0 }, + "title": "CPU máx", + "gridPos": { "h": 4, "w": 4, "x": 8, "y": 0 }, "datasource": { "type": "prometheus", "uid": "victoriametrics" }, - "targets": [ - { "refId": "A", "expr": "max(node_cpu_percent)", "datasource": { "type": "prometheus", "uid": "victoriametrics" } } - ], + "targets": [ { "refId": "A", "expr": "max(node_cpu_percent)", "datasource": { "type": "prometheus", "uid": "victoriametrics" } } ], "fieldConfig": { "defaults": { "unit": "percent", "min": 0, "max": 100, "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "yellow", "value": 70 }, { "color": "red", "value": 90 } ] } }, "overrides": [] }, "options": { "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "colorMode": "value", "graphMode": "area" } }, { "id": 3, "type": "stat", - "title": "RAM máx flota", - "gridPos": { "h": 4, "w": 6, "x": 12, "y": 0 }, + "title": "RAM máx", + "gridPos": { "h": 4, "w": 4, "x": 12, "y": 0 }, "datasource": { "type": "prometheus", "uid": "victoriametrics" }, - "targets": [ - { "refId": "A", "expr": "max(node_mem_used_percent)", "datasource": { "type": "prometheus", "uid": "victoriametrics" } } - ], + "targets": [ { "refId": "A", "expr": "max(node_mem_used_percent)", "datasource": { "type": "prometheus", "uid": "victoriametrics" } } ], "fieldConfig": { "defaults": { "unit": "percent", "min": 0, "max": 100, "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "yellow", "value": 75 }, { "color": "red", "value": 90 } ] } }, "overrides": [] }, "options": { "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "colorMode": "value", "graphMode": "area" } }, { "id": 4, "type": "stat", - "title": "Disco máx flota", - "gridPos": { "h": 4, "w": 6, "x": 18, "y": 0 }, + "title": "Disco máx", + "gridPos": { "h": 4, "w": 4, "x": 16, "y": 0 }, "datasource": { "type": "prometheus", "uid": "victoriametrics" }, - "targets": [ - { "refId": "A", "expr": "max(node_disk_used_percent)", "datasource": { "type": "prometheus", "uid": "victoriametrics" } } - ], + "targets": [ { "refId": "A", "expr": "max(node_disk_used_percent)", "datasource": { "type": "prometheus", "uid": "victoriametrics" } } ], "fieldConfig": { "defaults": { "unit": "percent", "min": 0, "max": 100, "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "yellow", "value": 80 }, { "color": "red", "value": 90 } ] } }, "overrides": [] }, "options": { "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "colorMode": "value", "graphMode": "area" } }, + { + "id": 12, + "type": "stat", + "title": "Load máx", + "gridPos": { "h": 4, "w": 4, "x": 20, "y": 0 }, + "datasource": { "type": "prometheus", "uid": "victoriametrics" }, + "targets": [ { "refId": "A", "expr": "max(node_load1)", "datasource": { "type": "prometheus", "uid": "victoriametrics" } } ], + "fieldConfig": { "defaults": { "unit": "short", "color": { "mode": "fixed", "fixedColor": "blue" } }, "overrides": [] }, + "options": { "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "colorMode": "value", "graphMode": "area" } + }, { "id": 5, "type": "timeseries", "title": "CPU % por nodo", - "gridPos": { "h": 9, "w": 12, "x": 0, "y": 4 }, + "gridPos": { "h": 6, "w": 12, "x": 0, "y": 4 }, "datasource": { "type": "prometheus", "uid": "victoriametrics" }, - "targets": [ - { "refId": "A", "expr": "node_cpu_percent", "legendFormat": "{{instance}}", "datasource": { "type": "prometheus", "uid": "victoriametrics" } } - ], + "targets": [ { "refId": "A", "expr": "node_cpu_percent", "legendFormat": "{{instance}}", "datasource": { "type": "prometheus", "uid": "victoriametrics" } } ], "fieldConfig": { "defaults": { "unit": "percent", "min": 0, "max": 100, "custom": { "drawStyle": "line", "fillOpacity": 10, "showPoints": "never", "lineWidth": 2 } }, "overrides": [] }, "options": { "legend": { "displayMode": "table", "placement": "right", "calcs": ["lastNotNull", "max"] }, "tooltip": { "mode": "multi", "sort": "desc" } } }, @@ -73,11 +83,9 @@ "id": 6, "type": "timeseries", "title": "RAM % por nodo", - "gridPos": { "h": 9, "w": 12, "x": 12, "y": 4 }, + "gridPos": { "h": 6, "w": 12, "x": 12, "y": 4 }, "datasource": { "type": "prometheus", "uid": "victoriametrics" }, - "targets": [ - { "refId": "A", "expr": "node_mem_used_percent", "legendFormat": "{{instance}}", "datasource": { "type": "prometheus", "uid": "victoriametrics" } } - ], + "targets": [ { "refId": "A", "expr": "node_mem_used_percent", "legendFormat": "{{instance}}", "datasource": { "type": "prometheus", "uid": "victoriametrics" } } ], "fieldConfig": { "defaults": { "unit": "percent", "min": 0, "max": 100, "custom": { "drawStyle": "line", "fillOpacity": 10, "showPoints": "never", "lineWidth": 2 } }, "overrides": [] }, "options": { "legend": { "displayMode": "table", "placement": "right", "calcs": ["lastNotNull", "max"] }, "tooltip": { "mode": "multi", "sort": "desc" } } }, @@ -85,11 +93,9 @@ "id": 7, "type": "timeseries", "title": "Load 1m por nodo", - "gridPos": { "h": 9, "w": 12, "x": 0, "y": 13 }, + "gridPos": { "h": 6, "w": 12, "x": 0, "y": 10 }, "datasource": { "type": "prometheus", "uid": "victoriametrics" }, - "targets": [ - { "refId": "A", "expr": "node_load1", "legendFormat": "{{instance}}", "datasource": { "type": "prometheus", "uid": "victoriametrics" } } - ], + "targets": [ { "refId": "A", "expr": "node_load1", "legendFormat": "{{instance}}", "datasource": { "type": "prometheus", "uid": "victoriametrics" } } ], "fieldConfig": { "defaults": { "unit": "short", "min": 0, "custom": { "drawStyle": "line", "fillOpacity": 10, "showPoints": "never", "lineWidth": 2 } }, "overrides": [] }, "options": { "legend": { "displayMode": "table", "placement": "right", "calcs": ["lastNotNull", "max"] }, "tooltip": { "mode": "multi", "sort": "desc" } } }, @@ -97,11 +103,9 @@ "id": 8, "type": "timeseries", "title": "Disco usado % (máx por nodo)", - "gridPos": { "h": 9, "w": 12, "x": 12, "y": 13 }, + "gridPos": { "h": 6, "w": 12, "x": 12, "y": 10 }, "datasource": { "type": "prometheus", "uid": "victoriametrics" }, - "targets": [ - { "refId": "A", "expr": "max by(instance) (node_disk_used_percent)", "legendFormat": "{{instance}}", "datasource": { "type": "prometheus", "uid": "victoriametrics" } } - ], + "targets": [ { "refId": "A", "expr": "max by(instance) (node_disk_used_percent)", "legendFormat": "{{instance}}", "datasource": { "type": "prometheus", "uid": "victoriametrics" } } ], "fieldConfig": { "defaults": { "unit": "percent", "min": 0, "max": 100, "custom": { "drawStyle": "line", "fillOpacity": 10, "showPoints": "never", "lineWidth": 2 } }, "overrides": [] }, "options": { "legend": { "displayMode": "table", "placement": "right", "calcs": ["lastNotNull", "max"] }, "tooltip": { "mode": "multi", "sort": "desc" } } }, @@ -109,11 +113,9 @@ "id": 9, "type": "timeseries", "title": "Red recibida (sum por nodo)", - "gridPos": { "h": 9, "w": 12, "x": 0, "y": 22 }, + "gridPos": { "h": 6, "w": 12, "x": 0, "y": 16 }, "datasource": { "type": "prometheus", "uid": "victoriametrics" }, - "targets": [ - { "refId": "A", "expr": "sum by(instance) (rate(node_net_recv_bytes[2m]))", "legendFormat": "{{instance}}", "datasource": { "type": "prometheus", "uid": "victoriametrics" } } - ], + "targets": [ { "refId": "A", "expr": "sum by(instance) (rate(node_net_recv_bytes[2m]))", "legendFormat": "{{instance}}", "datasource": { "type": "prometheus", "uid": "victoriametrics" } } ], "fieldConfig": { "defaults": { "unit": "Bps", "min": 0, "custom": { "drawStyle": "line", "fillOpacity": 10, "showPoints": "never", "lineWidth": 2 } }, "overrides": [] }, "options": { "legend": { "displayMode": "table", "placement": "right", "calcs": ["lastNotNull", "max"] }, "tooltip": { "mode": "multi", "sort": "desc" } } }, @@ -121,11 +123,9 @@ "id": 10, "type": "timeseries", "title": "Red enviada (sum por nodo)", - "gridPos": { "h": 9, "w": 12, "x": 12, "y": 22 }, + "gridPos": { "h": 6, "w": 12, "x": 12, "y": 16 }, "datasource": { "type": "prometheus", "uid": "victoriametrics" }, - "targets": [ - { "refId": "A", "expr": "sum by(instance) (rate(node_net_sent_bytes[2m]))", "legendFormat": "{{instance}}", "datasource": { "type": "prometheus", "uid": "victoriametrics" } } - ], + "targets": [ { "refId": "A", "expr": "sum by(instance) (rate(node_net_sent_bytes[2m]))", "legendFormat": "{{instance}}", "datasource": { "type": "prometheus", "uid": "victoriametrics" } } ], "fieldConfig": { "defaults": { "unit": "Bps", "min": 0, "custom": { "drawStyle": "line", "fillOpacity": 10, "showPoints": "never", "lineWidth": 2 } }, "overrides": [] }, "options": { "legend": { "displayMode": "table", "placement": "right", "calcs": ["lastNotNull", "max"] }, "tooltip": { "mode": "multi", "sort": "desc" } } } diff --git a/hub/deploy_agent.sh b/hub/deploy_agent.sh index a688895..e0286ff 100755 --- a/hub/deploy_agent.sh +++ b/hub/deploy_agent.sh @@ -19,6 +19,7 @@ HOST="${2:?uso: deploy_agent.sh [arch]}" ARCH="${3:-amd64}" HUB="https://metrics-dxaqj3ina6eqd5pjt85wkrrj.organic-machine.com/api/v1/import/prometheus" +LOKI="https://logs-wmaxecsjcfnocz81d5luca92.organic-machine.com/loki/api/v1/push" PW="$(pass show fleet/ingest-pass | head -1)" BIN="$(cd "$(dirname "$0")/.." && pwd)/apps/metrics_agent/dist/metrics_agent_${ARCH}" @@ -28,7 +29,7 @@ echo ">> copiando binario a $HOST" scp -q -o BatchMode=yes "$BIN" "$HOST:/tmp/metrics_agent" echo ">> instalando servicio en $NODE ($HOST)" -ssh -o BatchMode=yes "$HOST" "NODE='$NODE' PW='$PW' HUB='$HUB' bash -s" <<'OUTER' +ssh -o BatchMode=yes "$HOST" "NODE='$NODE' PW='$PW' HUB='$HUB' LOKI='$LOKI' bash -s" <<'OUTER' set -e sudo -n mkdir -p /opt/fleet-agent /etc/fleet-agent sudo -n mv /tmp/metrics_agent /opt/fleet-agent/metrics_agent @@ -37,6 +38,7 @@ sudo -n tee /etc/fleet-agent/agent.json >/dev/null </dev/null 2>&1 +sudo -n systemctl restart fleet-agent sleep 3 echo -n "status: "; systemctl is-active fleet-agent OUTER diff --git a/hub/grafana-provisioning/loki-datasource.yaml b/hub/grafana-provisioning/loki-datasource.yaml new file mode 100644 index 0000000..368e4c4 --- /dev/null +++ b/hub/grafana-provisioning/loki-datasource.yaml @@ -0,0 +1,10 @@ +apiVersion: 1 + +datasources: + - name: Loki + uid: loki + type: loki + access: proxy + url: http://127.0.0.1:3100 + jsonData: + maxLines: 1000 diff --git a/hub/loki-config.yaml b/hub/loki-config.yaml new file mode 100644 index 0000000..2c28e7e --- /dev/null +++ b/hub/loki-config.yaml @@ -0,0 +1,42 @@ +# Loki single-binary config para el hub fleet_monitoring (magnus). +# Storage filesystem, retención 31 días, escucha solo en localhost (Caddy expone el vhost logs- con auth). +auth_enabled: false + +server: + http_listen_address: 127.0.0.1 + http_listen_port: 3100 + grpc_listen_port: 9095 + log_level: warn + +common: + instance_addr: 127.0.0.1 + path_prefix: /var/lib/loki + storage: + filesystem: + chunks_directory: /var/lib/loki/chunks + rules_directory: /var/lib/loki/rules + replication_factor: 1 + ring: + kvstore: + store: inmemory + +schema_config: + configs: + - from: 2020-10-24 + store: tsdb + object_store: filesystem + schema: v13 + index: + prefix: index_ + period: 24h + +limits_config: + retention_period: 744h + reject_old_samples: false + allow_structured_metadata: true + volume_enabled: true + +compactor: + working_directory: /var/lib/loki/compactor + retention_enabled: true + delete_request_store: filesystem