Add initial project structure and configuration for monitoring suite

- Create .gitignore to exclude Python-generated files and virtual environments
- Add .python-version for Python version management
- Initialize README.md with project description and usage instructions
- Implement alloy.river configuration for Grafana Alloy observability
- Set up grafana.ini for Grafana configuration
- Configure dashboards.yml for automatic dashboard loading in Grafana
- Define datasources.yml for connecting Grafana to Prometheus, Loki, and Tempo
- Establish loki.yaml configuration for Loki logging
- Set up prometheus.yml for Prometheus metrics collection
- Configure tempo.yaml for Tempo tracing
- Create docker-compose.yml for orchestrating services
- Develop init.sh script for initializing project directories and services
- Implement main.py as the entry point for the application
- Define pyproject.toml for project metadata and dependencies
- Update uv.lock for dependency management
This commit is contained in:
2025-10-06 00:52:31 +02:00
commit 9966851e75
15 changed files with 996 additions and 0 deletions
+207
View File
@@ -0,0 +1,207 @@
//=============================================================================
// GRAFANA ALLOY - PUERTA DE ENTRADA ÚNICA DE OBSERVABILIDAD
//=============================================================================
//=============================================================================
// PROMETHEUS - MÉTRICAS
//=============================================================================
// Scraping del propio Alloy
prometheus.scrape "alloy" {
targets = [{"__address__" = "localhost:12345"}]
forward_to = [prometheus.remote_write.prometheus.receiver]
scrape_interval = "15s"
metrics_path = "/metrics"
job_name = "alloy"
}
// Scraping de Prometheus
prometheus.scrape "prometheus" {
targets = [{"__address__" = "prometheus:9090"}]
forward_to = [prometheus.remote_write.prometheus.receiver]
scrape_interval = "15s"
metrics_path = "/metrics"
job_name = "prometheus"
}
// Scraping de Grafana
prometheus.scrape "grafana" {
targets = [{"__address__" = "grafana:3000"}]
forward_to = [prometheus.remote_write.prometheus.receiver]
scrape_interval = "30s"
metrics_path = "/metrics"
job_name = "grafana"
}
// Scraping de Loki
prometheus.scrape "loki" {
targets = [{"__address__" = "loki:3100"}]
forward_to = [prometheus.remote_write.prometheus.receiver]
scrape_interval = "15s"
metrics_path = "/metrics"
job_name = "loki"
}
// Scraping de Tempo
prometheus.scrape "tempo" {
targets = [{"__address__" = "tempo:3200"}]
forward_to = [prometheus.remote_write.prometheus.receiver]
scrape_interval = "15s"
metrics_path = "/metrics"
job_name = "tempo"
}
// Receptor para métricas externas (aplicaciones que envían métricas)
prometheus.receive_http "external_metrics" {
http {
listen_address = "0.0.0.0"
listen_port = 9999
}
forward_to = [prometheus.remote_write.prometheus.receiver]
}
// Remote write a Prometheus
prometheus.remote_write "prometheus" {
endpoint {
url = "http://prometheus:9090/api/v1/write"
}
}
//=============================================================================
// LOKI - LOGS
//=============================================================================
// Descubrimiento de contenedores Docker
discovery.docker "docker_logs" {
host = "unix:///var/run/docker.sock"
refresh_interval = "5s"
}
// Fuente de logs de contenedores Docker
loki.source.docker "containers" {
host = "unix:///var/run/docker.sock"
targets = discovery.docker.docker_logs.targets
refresh_interval = "5s"
forward_to = [loki.relabel.docker.receiver]
}
// Relabel para contenedores Docker
loki.relabel "docker" {
forward_to = [loki.write.loki.receiver]
rule {
source_labels = ["__meta_docker_container_name"]
target_label = "container"
}
rule {
source_labels = ["__meta_docker_container_image"]
target_label = "image"
}
}
// Receptor HTTP para logs externos
loki.source.api "external_logs" {
http {
listen_address = "0.0.0.0"
listen_port = 3101
}
forward_to = [loki.write.loki.receiver]
}
// Receptor Syslog para logs del sistema
loki.source.syslog "system_logs" {
listener {
address = "0.0.0.0:1514"
protocol = "tcp"
}
forward_to = [loki.relabel.syslog.receiver]
}
// Relabel para logs del sistema
loki.relabel "syslog" {
forward_to = [loki.write.loki.receiver]
rule {
source_labels = ["__syslog_message_hostname"]
target_label = "hostname"
}
}
// Cliente Loki - destino final
loki.write "loki" {
endpoint {
url = "http://loki:3100/loki/api/v1/push"
}
}
//=============================================================================
// TEMPO - TRAZAS
//=============================================================================
// Receptor OTLP
otelcol.receiver.otlp "tempo" {
grpc {
endpoint = "0.0.0.0:4317"
}
http {
endpoint = "0.0.0.0:4318"
}
output {
traces = [otelcol.processor.batch.tempo.input]
}
}
// Receptor Jaeger
otelcol.receiver.jaeger "jaeger_traces" {
protocols {
grpc {
endpoint = "0.0.0.0:14250"
}
thrift_http {
endpoint = "0.0.0.0:14268"
}
thrift_compact {
endpoint = "0.0.0.0:6831"
}
}
output {
traces = [otelcol.processor.batch.tempo.input]
}
}
// Receptor Zipkin
otelcol.receiver.zipkin "zipkin_traces" {
endpoint = "0.0.0.0:9411"
output {
traces = [otelcol.processor.batch.tempo.input]
}
}
// Procesador batch
otelcol.processor.batch "tempo" {
send_batch_size = 1024
timeout = "1s"
output {
traces = [otelcol.exporter.otlp.tempo.input]
}
}
// Exportador a Tempo
otelcol.exporter.otlp "tempo" {
client {
endpoint = "http://tempo:4317"
tls {
insecure = true
}
}
}
//=============================================================================
// CONFIGURACIÓN GENERAL
//=============================================================================
logging {
level = "info"
format = "logfmt"
}
+72
View File
@@ -0,0 +1,72 @@
# Configuración principal de Grafana
# Puedes editar este archivo para personalizar Grafana
[server]
# Puerto HTTP
http_port = 3000
# Dominio de la aplicación
domain = localhost
# URL raíz
root_url = http://localhost:3000/
[database]
# Tipo de base de datos (sqlite3, mysql, postgres)
type = sqlite3
# Ruta de la base de datos SQLite
path = /var/lib/grafana/grafana.db
[security]
# Usuario administrador por defecto
admin_user = admin
# Contraseña administrador por defecto
admin_password = admin123
# Clave secreta para cookies
secret_key = your_secret_key_here
[users]
# Permitir registro de usuarios
allow_sign_up = false
# Permitir que los usuarios creen organizaciones
allow_org_create = false
# Asignación automática de organización
auto_assign_org = true
# Rol por defecto para nuevos usuarios
auto_assign_org_role = Viewer
[auth.anonymous]
# Habilitar acceso anónimo
enabled = false
[logging]
# Nivel de logging (debug, info, warn, error)
level = info
# Formato de logging (console, json)
format = console
[metrics]
# Habilitar métricas internas de Grafana
enabled = true
# Intervalo de recolección de métricas
interval_seconds = 10
[unified_alerting]
# Habilitar nuevo sistema de alertas
enabled = true
[explore]
# Habilitar el modo Explore
enabled = true
[feature_toggles]
# Habilitar características experimentales
enable = newPanelChromeUI
[panels]
# Deshabilitar sanitización de HTML en paneles de texto
disable_sanitize_html = false
[plugins]
# Permitir carga de plugins sin firma
allow_loading_unsigned_plugins = false
# Habilitar plugins por defecto
enable_alpha = false
@@ -0,0 +1,29 @@
# Configuración de dashboards para Grafana
# Puedes editar este archivo para configurar la carga automática de dashboards
apiVersion: 1
providers:
# Proveedor principal de dashboards
- name: 'default'
orgId: 1
folder: ''
type: file
disableDeletion: false
editable: true
updateIntervalSeconds: 10
allowUiUpdates: true
options:
path: /var/lib/grafana/dashboards
# Proveedor para dashboards de sistema
- name: 'system'
orgId: 1
folder: 'System'
type: file
disableDeletion: false
editable: true
updateIntervalSeconds: 10
allowUiUpdates: true
options:
path: /var/lib/grafana/dashboards/system
@@ -0,0 +1,94 @@
apiVersion: 1
# Configuración automática de datasources para Grafana
# Conexión directa a Prometheus, Loki y Tempo
datasources:
# Prometheus para métricas
- name: Prometheus
type: prometheus
access: proxy
url: http://prometheus:9090
uid: prometheus
isDefault: true
editable: false
jsonData:
httpMethod: POST
prometheusType: Prometheus
prometheusVersion: 2.40.0
cacheLevel: 'High'
disableRecordingRules: false
incrementalQueryOverlapWindow: 10m
exemplarTraceIdDestinations:
- name: TraceID
datasourceUid: tempo
urlDisplayLabel: "View Trace"
# Loki para logs
- name: Loki
type: loki
access: proxy
url: http://loki:3100
uid: loki
editable: false
jsonData:
maxLines: 1000
derivedFields:
- datasourceUid: tempo
matcherRegex: "(?:traceID|trace_id|traceid)=([A-Fa-f0-9]+)"
name: TraceID
url: "$${__value.raw}"
urlDisplayLabel: "View Trace"
# Tempo para trazas
- name: Tempo
type: tempo
access: proxy
url: http://tempo:3200
uid: tempo
editable: false
jsonData:
httpMethod: GET
tracesToLogs:
datasourceUid: loki
tags: ['job', 'instance', 'pod', 'namespace', 'container']
mappedTags: [
{
key: 'service.name',
value: 'service'
},
{
key: 'container.name',
value: 'container'
}
]
mapTagNamesEnabled: true
spanStartTimeShift: '1h'
spanEndTimeShift: '1h'
filterByTraceID: true
filterBySpanID: false
tracesToMetrics:
datasourceUid: prometheus
tags: [
{
key: 'service.name',
value: 'service'
},
{
key: 'job'
}
]
queries:
- name: 'Request Rate'
query: 'rate(traces_service_graph_request_total{$$__tags}[5m])'
- name: 'Error Rate'
query: 'rate(traces_service_graph_request_failed_total{$$__tags}[5m])'
- name: 'Duration P99'
query: 'histogram_quantile(0.99, rate(traces_spanmetrics_latency_bucket{$$__tags}[5m]))'
serviceMap:
datasourceUid: prometheus
search:
hide: false
nodeGraph:
enabled: true
lokiSearch:
datasourceUid: loki
+50
View File
@@ -0,0 +1,50 @@
auth_enabled: false
server:
http_listen_port: 3100
grpc_listen_port: 9096
common:
instance_addr: 127.0.0.1
path_prefix: /loki
storage:
filesystem:
chunks_directory: /loki/chunks
rules_directory: /loki/rules
replication_factor: 1
ring:
kvstore:
store: inmemory
query_range:
results_cache:
cache:
embedded_cache:
enabled: true
max_size_mb: 100
schema_config:
configs:
- from: 2020-10-24
store: tsdb
object_store: filesystem
schema: v13
index:
prefix: index_
period: 24h
ruler:
alertmanager_url: http://localhost:9093
# By default, Loki will send anonymous, but uniquely-identifiable usage and configuration
# analytics to Grafana Labs. These statistics are sent to https://stats.grafana.org/
#
# Statistics help us better understand how Loki is used, and they show us performance
# levels for most users. This helps us prioritize features and documentation.
# For more information on what's sent, look at
# https://github.com/grafana/loki/blob/main/pkg/usagestats/stats.go
# Refer to the buildReport method to see what goes into a report.
#
# If you would like to disable reporting, uncomment the following lines:
analytics:
reporting_enabled: false
+23
View File
@@ -0,0 +1,23 @@
# Configuración de Prometheus - Solo remote write desde Alloy
# Alloy es la única puerta de entrada para métricas
global:
scrape_interval: 15s
evaluation_interval: 15s
external_labels:
cluster: 'development'
replica: 'prometheus-01'
# Configuración de reglas de alertas
rule_files:
- "rules/*.yml"
# Sin scrapers directos - Todo viene desde Alloy via remote_write
scrape_configs: []
# Configuración de Alertmanager (descomenta si tienes Alertmanager)
# alerting:
# alertmanagers:
# - static_configs:
# - targets:
# - alertmanager:9093
+46
View File
@@ -0,0 +1,46 @@
server:
http_listen_port: 3200
grpc_listen_port: 9095
distributor:
receivers:
otlp:
protocols:
grpc:
endpoint: 0.0.0.0:4317
http:
endpoint: 0.0.0.0:4318
ingester:
max_block_duration: 5m
compactor:
compaction:
block_retention: 1h
storage:
trace:
backend: local
local:
path: /tmp/tempo/traces
wal:
path: /tmp/tempo/wal
pool:
max_workers: 100
queue_depth: 10000
query_frontend:
search:
duration_slo: 5s
throughput_bytes_slo: 1.073741824e+09
trace_by_id:
duration_slo: 5s
metrics_generator:
registry:
external_labels:
source: tempo
cluster: docker-compose
storage:
path: /tmp/tempo/generator/wal
# Sin remote_write directo - las métricas van a través de Alloy