feat: media_analytics — ETL PC+VPS → ClickHouse + Grafana

2 ETLs cada 5min suben snapshots (Jellyfin, *arr, Prowlarr, gnula,
popelis users/mylist/events) a ClickHouse en el VPS, visualizado en
Grafana (grafana.datardos.com). Ingesta PC via tunel SSH; popelis
via ETL local en el VPS. Usa clickhouse_insert_rows_py_infra.
This commit is contained in:
Egutierrez
2026-05-30 14:55:48 +02:00
commit 52999ecb86
9 changed files with 853 additions and 0 deletions
+173
View File
@@ -0,0 +1,173 @@
-- Esquema analitico media stack. Todas las tablas de snapshot llevan snapshot_ts
-- (momento de la captura del ETL, cada 5min) → permite analisis temporal del estado.
-- Las tablas de eventos llevan event_ts (instante real del evento).
-- Engine MergeTree, particion mensual, orden por (snapshot_ts, clave).
CREATE DATABASE IF NOT EXISTS analytics;
-- ============ JELLYFIN ============
-- Catalogo: peliculas/series/episodios visibles en la biblioteca.
CREATE TABLE IF NOT EXISTS analytics.jellyfin_items (
snapshot_ts DateTime,
item_id String,
type LowCardinality(String), -- Movie | Series | Episode
name String,
production_year Int32,
runtime_min Float32,
genres Array(String),
community_rating Float32,
official_rating String,
series_name String,
library String,
path String,
date_created DateTime DEFAULT toDateTime(0)
) ENGINE = MergeTree
PARTITION BY toYYYYMM(snapshot_ts)
ORDER BY (snapshot_ts, type, item_id);
-- Usuarios Jellyfin (espejos popelis).
CREATE TABLE IF NOT EXISTS analytics.jellyfin_users (
snapshot_ts DateTime,
user_id String,
name String,
last_login DateTime DEFAULT toDateTime(0),
last_activity DateTime DEFAULT toDateTime(0),
is_admin UInt8
) ENGINE = MergeTree
PARTITION BY toYYYYMM(snapshot_ts)
ORDER BY (snapshot_ts, user_id);
-- Estado de reproduccion por usuario+item (playcount, visto, ultima vez).
CREATE TABLE IF NOT EXISTS analytics.jellyfin_user_items (
snapshot_ts DateTime,
user_id String,
user_name String,
item_id String,
item_name String,
type LowCardinality(String),
played UInt8,
play_count Int32,
playback_pct Float32,
last_played DateTime DEFAULT toDateTime(0)
) ENGINE = MergeTree
PARTITION BY toYYYYMM(snapshot_ts)
ORDER BY (snapshot_ts, user_id, item_id);
-- Sesiones activas (lo que se esta viendo en el momento del snapshot).
CREATE TABLE IF NOT EXISTS analytics.jellyfin_sessions (
snapshot_ts DateTime,
user_name String,
item_name String,
item_type LowCardinality(String),
client String,
device String,
play_method String,
is_paused UInt8,
position_pct Float32
) ENGINE = MergeTree
PARTITION BY toYYYYMM(snapshot_ts)
ORDER BY (snapshot_ts, user_name);
-- ============ SCRAPERS TORRENTS (*arr) ============
-- Historial Radarr/Sonarr: grabs, imports, fallos.
CREATE TABLE IF NOT EXISTS analytics.arr_history (
snapshot_ts DateTime,
app LowCardinality(String), -- radarr | sonarr
history_id Int64,
event_type LowCardinality(String), -- grabbed | downloadFolderImported | ...
title String,
source_title String,
indexer String,
download_client String,
quality String,
languages Array(String),
event_date DateTime DEFAULT toDateTime(0)
) ENGINE = MergeTree
PARTITION BY toYYYYMM(snapshot_ts)
ORDER BY (snapshot_ts, app, history_id);
-- Cola activa de descargas.
CREATE TABLE IF NOT EXISTS analytics.arr_queue (
snapshot_ts DateTime,
app LowCardinality(String),
title String,
status String,
tracked_status String,
size_bytes Int64,
sizeleft_bytes Int64,
timeleft String,
indexer String,
download_client String
) ENGINE = MergeTree
PARTITION BY toYYYYMM(snapshot_ts)
ORDER BY (snapshot_ts, app, title);
-- Indexers Prowlarr: estado + contadores grab/query.
CREATE TABLE IF NOT EXISTS analytics.prowlarr_indexers (
snapshot_ts DateTime,
indexer_id Int32,
name String,
enable UInt8,
protocol String,
privacy String,
num_grabs Int64,
num_queries Int64,
num_grab_fail Int64,
num_query_fail Int64
) ENGINE = MergeTree
PARTITION BY toYYYYMM(snapshot_ts)
ORDER BY (snapshot_ts, indexer_id);
-- ============ GNULA SCRAPPER ============
-- Catalogo de pelis en castellano detectadas (gnula_catalog.db).
CREATE TABLE IF NOT EXISTS analytics.gnula_movies (
snapshot_ts DateTime,
href String,
title String,
year Int32,
flags String,
lang_es UInt8,
status LowCardinality(String), -- pending | downloaded | failed | have
in_library UInt8,
detected_at String,
downloaded_at String
) ENGINE = MergeTree
PARTITION BY toYYYYMM(snapshot_ts)
ORDER BY (snapshot_ts, href);
-- ============ POPELIS ============
-- Usuarios (estado).
CREATE TABLE IF NOT EXISTS analytics.popelis_users (
snapshot_ts DateTime,
user_id Int64,
username String,
jf_user_id String,
created_at DateTime DEFAULT toDateTime(0)
) ENGINE = MergeTree
PARTITION BY toYYYYMM(snapshot_ts)
ORDER BY (snapshot_ts, user_id);
-- Mi lista por usuario (estado).
CREATE TABLE IF NOT EXISTS analytics.popelis_mylist (
snapshot_ts DateTime,
user_id Int64,
item_id String,
added_at DateTime DEFAULT toDateTime(0)
) ENGINE = MergeTree
PARTITION BY toYYYYMM(snapshot_ts)
ORDER BY (snapshot_ts, user_id, item_id);
-- Eventos (logins, plays, mylist add/remove) — instrumentados en popelis-api.
-- Tabla de hechos: dedup por event_id con ReplacingMergeTree.
CREATE TABLE IF NOT EXISTS analytics.popelis_events (
event_id Int64,
event_ts DateTime,
user_id Int64,
username String,
event_type LowCardinality(String), -- login | logout | play | mylist_add | mylist_remove
item_id String,
meta String,
ingested_at DateTime DEFAULT now()
) ENGINE = ReplacingMergeTree(ingested_at)
PARTITION BY toYYYYMM(event_ts)
ORDER BY (event_id);
+60
View File
@@ -0,0 +1,60 @@
services:
clickhouse:
image: clickhouse/clickhouse-server:24.8-alpine
container_name: clickhouse
restart: always
environment:
CLICKHOUSE_DB: analytics
CLICKHOUSE_USER: analytics
CLICKHOUSE_PASSWORD: ${CH_PASSWORD}
CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT: "1"
ulimits:
nofile:
soft: 262144
hard: 262144
volumes:
- clickhouse_data:/var/lib/clickhouse
- ./clickhouse/schema.sql:/docker-entrypoint-initdb.d/01_schema.sql:ro
networks:
- coolify
ports:
# HTTP solo en localhost del VPS (no publico). Ingesta del PC via tunel SSH.
# Grafana usa el nativo 9000 por la red coolify (no expuesto).
- "127.0.0.1:8123:8123"
deploy:
resources:
limits:
memory: 2g
grafana:
image: grafana/grafana:11.2.0
container_name: grafana
restart: always
environment:
GF_SECURITY_ADMIN_USER: admin
GF_SECURITY_ADMIN_PASSWORD: ${GF_PASSWORD}
GF_INSTALL_PLUGINS: grafana-clickhouse-datasource
GF_SERVER_ROOT_URL: https://grafana.datardos.com
GF_USERS_ALLOW_SIGN_UP: "false"
CH_PASSWORD: ${CH_PASSWORD}
volumes:
- grafana_data:/var/lib/grafana
- ./grafana/provisioning:/etc/grafana/provisioning:ro
networks:
- coolify
labels:
traefik.enable: "true"
traefik.docker.network: coolify
traefik.http.routers.grafana.entrypoints: https
traefik.http.routers.grafana.rule: Host(`grafana.datardos.com`)
traefik.http.routers.grafana.tls: "true"
traefik.http.routers.grafana.tls.certresolver: letsencrypt
traefik.http.services.grafana.loadbalancer.server.port: "3000"
volumes:
clickhouse_data:
grafana_data:
networks:
coolify:
external: true
@@ -0,0 +1,13 @@
apiVersion: 1
providers:
- name: media-stack
orgId: 1
folder: Media Stack
type: file
disableDeletion: false
updateIntervalSeconds: 30
allowUiUpdates: true
options:
path: /etc/grafana/provisioning/dashboards
foldersFromFilesStructure: false
@@ -0,0 +1,88 @@
{
"uid": "media-stack",
"title": "Media Stack Analytics",
"tags": ["media", "popelis"],
"timezone": "browser",
"schemaVersion": 39,
"version": 1,
"refresh": "5m",
"time": { "from": "now-7d", "to": "now" },
"templating": { "list": [] },
"annotations": { "list": [] },
"panels": [
{
"id": 1, "type": "stat", "title": "Jellyfin · items (último)",
"gridPos": { "h": 4, "w": 4, "x": 0, "y": 0 },
"datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" },
"targets": [ { "refId": "A", "datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, "editorType": "sql", "queryType": "table", "format": 1, "rawSql": "SELECT count() AS items FROM analytics.jellyfin_items WHERE snapshot_ts = (SELECT max(snapshot_ts) FROM analytics.jellyfin_items)" } ]
},
{
"id": 2, "type": "stat", "title": "Popelis · usuarios",
"gridPos": { "h": 4, "w": 4, "x": 4, "y": 0 },
"datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" },
"targets": [ { "refId": "A", "datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, "editorType": "sql", "queryType": "table", "format": 1, "rawSql": "SELECT count() AS users FROM analytics.popelis_users WHERE snapshot_ts = (SELECT max(snapshot_ts) FROM analytics.popelis_users)" } ]
},
{
"id": 3, "type": "stat", "title": "gnula · pendientes",
"gridPos": { "h": 4, "w": 4, "x": 8, "y": 0 },
"fieldConfig": { "defaults": { "color": { "mode": "fixed", "fixedColor": "orange" } }, "overrides": [] },
"datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" },
"targets": [ { "refId": "A", "datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, "editorType": "sql", "queryType": "table", "format": 1, "rawSql": "SELECT countIf(status='pending') AS pending FROM analytics.gnula_movies WHERE snapshot_ts = (SELECT max(snapshot_ts) FROM analytics.gnula_movies)" } ]
},
{
"id": 4, "type": "stat", "title": "gnula · descargadas",
"gridPos": { "h": 4, "w": 4, "x": 12, "y": 0 },
"fieldConfig": { "defaults": { "color": { "mode": "fixed", "fixedColor": "green" } }, "overrides": [] },
"datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" },
"targets": [ { "refId": "A", "datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, "editorType": "sql", "queryType": "table", "format": 1, "rawSql": "SELECT countIf(status='downloaded') AS downloaded FROM analytics.gnula_movies WHERE snapshot_ts = (SELECT max(snapshot_ts) FROM analytics.gnula_movies)" } ]
},
{
"id": 5, "type": "stat", "title": "*arr · grabs (total)",
"gridPos": { "h": 4, "w": 4, "x": 16, "y": 0 },
"datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" },
"targets": [ { "refId": "A", "datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, "editorType": "sql", "queryType": "table", "format": 1, "rawSql": "SELECT countIf(event_type='grabbed') AS grabs FROM analytics.arr_history WHERE snapshot_ts = (SELECT max(snapshot_ts) FROM analytics.arr_history)" } ]
},
{
"id": 6, "type": "stat", "title": "Jellyfin · sesiones activas (último)",
"gridPos": { "h": 4, "w": 4, "x": 20, "y": 0 },
"datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" },
"targets": [ { "refId": "A", "datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, "editorType": "sql", "queryType": "table", "format": 1, "rawSql": "SELECT count() AS sesiones FROM analytics.jellyfin_sessions WHERE snapshot_ts = (SELECT max(snapshot_ts) FROM analytics.jellyfin_sessions)" } ]
},
{
"id": 10, "type": "timeseries", "title": "gnula · catálogo en el tiempo",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 4 },
"datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" },
"targets": [ { "refId": "A", "datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, "editorType": "sql", "queryType": "timeseries", "format": 0, "rawSql": "SELECT snapshot_ts AS time, countIf(status='pending') AS pendientes, countIf(status='downloaded') AS descargadas, countIf(in_library=1) AS en_biblioteca FROM analytics.gnula_movies GROUP BY time ORDER BY time" } ]
},
{
"id": 11, "type": "timeseries", "title": "Jellyfin · tamaño biblioteca en el tiempo",
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 4 },
"datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" },
"targets": [ { "refId": "A", "datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, "editorType": "sql", "queryType": "timeseries", "format": 0, "rawSql": "SELECT snapshot_ts AS time, countIf(type='Movie') AS peliculas, countIf(type='Series') AS series, countIf(type='Episode') AS episodios FROM analytics.jellyfin_items GROUP BY time ORDER BY time" } ]
},
{
"id": 20, "type": "table", "title": "*arr · grabs recientes",
"gridPos": { "h": 9, "w": 12, "x": 0, "y": 12 },
"datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" },
"targets": [ { "refId": "A", "datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, "editorType": "sql", "queryType": "table", "format": 1, "rawSql": "SELECT event_date, app, title, indexer, quality, arrayStringConcat(languages, ',') AS idiomas FROM analytics.arr_history WHERE snapshot_ts = (SELECT max(snapshot_ts) FROM analytics.arr_history) AND event_type='grabbed' ORDER BY event_date DESC LIMIT 30" } ]
},
{
"id": 21, "type": "table", "title": "Prowlarr · indexers",
"gridPos": { "h": 9, "w": 12, "x": 12, "y": 12 },
"datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" },
"targets": [ { "refId": "A", "datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, "editorType": "sql", "queryType": "table", "format": 1, "rawSql": "SELECT name, enable, protocol, num_grabs, num_queries, num_grab_fail, num_query_fail FROM analytics.prowlarr_indexers WHERE snapshot_ts = (SELECT max(snapshot_ts) FROM analytics.prowlarr_indexers) ORDER BY num_grabs DESC" } ]
},
{
"id": 30, "type": "table", "title": "Popelis · eventos recientes",
"gridPos": { "h": 9, "w": 12, "x": 0, "y": 21 },
"datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" },
"targets": [ { "refId": "A", "datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, "editorType": "sql", "queryType": "table", "format": 1, "rawSql": "SELECT event_ts, username, event_type, item_id FROM analytics.popelis_events ORDER BY event_ts DESC LIMIT 50" } ]
},
{
"id": 31, "type": "timeseries", "title": "Popelis · eventos por tipo (por día)",
"gridPos": { "h": 9, "w": 12, "x": 12, "y": 21 },
"datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" },
"targets": [ { "refId": "A", "datasource": { "type": "grafana-clickhouse-datasource", "uid": "clickhouse" }, "editorType": "sql", "queryType": "timeseries", "format": 0, "rawSql": "SELECT toStartOfDay(event_ts) AS time, countIf(event_type='login') AS logins, countIf(event_type='mylist_add') AS mylist_add, countIf(event_type='user_created') AS altas FROM analytics.popelis_events GROUP BY time ORDER BY time" } ]
}
]
}
@@ -0,0 +1,16 @@
apiVersion: 1
datasources:
- name: ClickHouse
uid: clickhouse
type: grafana-clickhouse-datasource
access: proxy
isDefault: true
jsonData:
host: clickhouse
port: 9000
protocol: native
username: analytics
defaultDatabase: analytics
secureJsonData:
password: ${CH_PASSWORD}