diff options
Diffstat (limited to 'modules/nixos/monitoring/rules/redis.yaml')
-rw-r--r-- | modules/nixos/monitoring/rules/redis.yaml | 189 |
1 files changed, 189 insertions, 0 deletions
diff --git a/modules/nixos/monitoring/rules/redis.yaml b/modules/nixos/monitoring/rules/redis.yaml new file mode 100644 index 0000000..f6d1fe1 --- /dev/null +++ b/modules/nixos/monitoring/rules/redis.yaml @@ -0,0 +1,189 @@ +--- +groups: + - name: Redis + + rules: + - alert: RedisDown + expr: >- + redis_up == 0 + for: 0m + labels: + severity: critical + annotations: + summary: Redis down (instance {{ $labels.instance }}) + description: |- + Redis instance is down. + + VALUE = {{ $value }} + LABELS = {{ $labels }} + + - alert: RedisMissingMaster + expr: >- + (count(redis_instance_info{role="master"}) or vector(0)) + < 1 + for: 0m + labels: + severity: critical + annotations: + summary: Redis missing master (instance {{ $labels.instance }}) + description: |- + Redis cluster has no node marked as master. + + VALUE = {{ $value }} + LABELS = {{ $labels }} + + - alert: RedisTooManyMasters + expr: >- + count(redis_instance_info{role="master"}) > 1 + for: 0m + labels: + severity: critical + annotations: + summary: Redis too many masters (instance {{ $labels.instance }}) + description: |- + Redis cluster has too many nodes marked as master. + + VALUE = {{ $value }} + LABELS = {{ $labels }} + + - alert: RedisDisconnectedSlaves + expr: >- + count without (instance, job) (redis_connected_slaves) + - sum without (instance, job) (redis_connected_slaves) + - 1 + > 1 + for: 0m + labels: + severity: critical + annotations: + summary: Redis disconnected slaves (instance {{ $labels.instance }}) + description: |- + Redis is not replicating for all slaves. + + VALUE = {{ $value }} + LABELS = {{ $labels }} + + - alert: RedisReplicationBroken + expr: >- + delta(redis_connected_slaves[1m]) < 0 + for: 0m + labels: + severity: critical + annotations: + summary: Redis replication broken (instance {{ $labels.instance }}) + description: |- + Redis instance lost a slave. + + VALUE = {{ $value }} + LABELS = {{ $labels }} + + - alert: RedisClusterFlapping + expr: >- + changes(redis_connected_slaves[1m]) > 1 + for: 2m + labels: + severity: critical + annotations: + summary: Redis cluster flapping (instance {{ $labels.instance }}) + description: |- + Changes have been detected in the Redis replica connection. This can + occur when replica nodes lose connection to the master and reconnect + (a.k.a flapping). + + VALUE = {{ $value }} + LABELS = {{ $labels }} + + - alert: RedisMissingBackup + expr: >- + time() - redis_rdb_last_save_timestamp_seconds + > 60 * 60 * 24 + for: 0m + labels: + severity: critical + annotations: + summary: Redis missing backup (instance {{ $labels.instance }}) + description: |- + Redis has not been backed up for 24 hours. + + VALUE = {{ $value }} + LABELS = {{ $labels }} + + - alert: RedisOutOfSystemMemory + expr: >- + redis_memory_used_bytes + / + redis_total_system_memory_bytes * 100 + > 90 + for: 2m + labels: + severity: warning + annotations: + summary: Redis out of system memory (instance {{ $labels.instance }}) + description: |- + Redis is running out of system memory (> 90%). + + VALUE = {{ $value }} + LABELS = {{ $labels }} + + - alert: RedisOutOfConfiguredMaxmemory + expr: >- + redis_memory_max_bytes != 0 + and + ( + redis_memory_used_bytes + / + redis_memory_max_bytes * 100 + > 90 + ) + for: 2m + labels: + severity: warning + annotations: + summary: Redis out of configured maxmemory (instance {{ $labels.instance }}) + description: |- + Redis is running out of configured maxmemory (> 90%). + + VALUE = {{ $value }} + LABELS = {{ $labels }} + + - alert: RedisTooManyConnections + expr: >- + redis_connected_clients > 100 + for: 2m + labels: + severity: warning + annotations: + summary: Redis too many connections (instance {{ $labels.instance }}) + description: |- + Redis instance has too many connections. + + VALUE = {{ $value }} + LABELS = {{ $labels }} + + - alert: RedisNotEnoughConnections + expr: >- + redis_connected_clients < 1 + for: 2m + labels: + severity: warning + annotations: + summary: Redis not enough connections (instance {{ $labels.instance }}) + description: |- + Redis instance should have more connections (> 1). + + VALUE = {{ $value }} + LABELS = {{ $labels }} + + - alert: RedisRejectedConnections + expr: >- + increase(redis_rejected_connections_total[1m]) > 0 + for: 0m + labels: + severity: critical + annotations: + summary: Redis rejected connections (instance {{ $labels.instance }}) + description: |- + Some connections to Redis have been rejected. + + VALUE = {{ $value }} + LABELS = {{ $labels }} |