about summary refs log tree commit diff
path: root/modules/nixos/monitoring/rules/redis.yaml
diff options
context:
space:
mode:
authorAzat Bahawi <azat@bahawi.net>2023-03-23 16:59:15 +0300
committerAzat Bahawi <azat@bahawi.net>2023-03-23 16:59:15 +0300
commitf6e36a699ae2d073e340e0a5844b8e111ed3de37 (patch)
tree52bb1d04e7b027165245b55622859b8425105fc7 /modules/nixos/monitoring/rules/redis.yaml
parent2023-03-22 (diff)
2023-03-23
Diffstat (limited to '')
-rw-r--r--modules/nixos/monitoring/rules/redis.yaml189
1 files changed, 189 insertions, 0 deletions
diff --git a/modules/nixos/monitoring/rules/redis.yaml b/modules/nixos/monitoring/rules/redis.yaml
new file mode 100644
index 0000000..f6d1fe1
--- /dev/null
+++ b/modules/nixos/monitoring/rules/redis.yaml
@@ -0,0 +1,189 @@
+---
+groups:
+  - name: Redis
+
+    rules:
+      - alert: RedisDown
+        expr: >-
+          redis_up == 0
+        for: 0m
+        labels:
+          severity: critical
+        annotations:
+          summary: Redis down (instance {{ $labels.instance }})
+          description: |-
+            Redis instance is down.
+
+              VALUE = {{ $value }}
+              LABELS = {{ $labels }}
+
+      - alert: RedisMissingMaster
+        expr: >-
+          (count(redis_instance_info{role="master"}) or vector(0))
+          < 1
+        for: 0m
+        labels:
+          severity: critical
+        annotations:
+          summary: Redis missing master (instance {{ $labels.instance }})
+          description: |-
+            Redis cluster has no node marked as master.
+
+              VALUE = {{ $value }}
+              LABELS = {{ $labels }}
+
+      - alert: RedisTooManyMasters
+        expr: >-
+          count(redis_instance_info{role="master"}) > 1
+        for: 0m
+        labels:
+          severity: critical
+        annotations:
+          summary: Redis too many masters (instance {{ $labels.instance }})
+          description: |-
+            Redis cluster has too many nodes marked as master.
+
+              VALUE = {{ $value }}
+              LABELS = {{ $labels }}
+
+      - alert: RedisDisconnectedSlaves
+        expr: >-
+          count without (instance, job) (redis_connected_slaves)
+          - sum without (instance, job) (redis_connected_slaves)
+          - 1
+          > 1
+        for: 0m
+        labels:
+          severity: critical
+        annotations:
+          summary: Redis disconnected slaves (instance {{ $labels.instance }})
+          description: |-
+            Redis is not replicating for all slaves.
+
+              VALUE = {{ $value }}
+              LABELS = {{ $labels }}
+
+      - alert: RedisReplicationBroken
+        expr: >-
+          delta(redis_connected_slaves[1m]) < 0
+        for: 0m
+        labels:
+          severity: critical
+        annotations:
+          summary: Redis replication broken (instance {{ $labels.instance }})
+          description: |-
+            Redis instance lost a slave.
+
+              VALUE = {{ $value }}
+              LABELS = {{ $labels }}
+
+      - alert: RedisClusterFlapping
+        expr: >-
+          changes(redis_connected_slaves[1m]) > 1
+        for: 2m
+        labels:
+          severity: critical
+        annotations:
+          summary: Redis cluster flapping (instance {{ $labels.instance }})
+          description: |-
+            Changes have been detected in the Redis replica connection. This can
+            occur when replica nodes lose connection to the master and reconnect
+            (a.k.a flapping).
+
+              VALUE = {{ $value }}
+              LABELS = {{ $labels }}
+
+      - alert: RedisMissingBackup
+        expr: >-
+          time() - redis_rdb_last_save_timestamp_seconds
+          > 60 * 60 * 24
+        for: 0m
+        labels:
+          severity: critical
+        annotations:
+          summary: Redis missing backup (instance {{ $labels.instance }})
+          description: |-
+            Redis has not been backed up for 24 hours.
+
+              VALUE = {{ $value }}
+              LABELS = {{ $labels }}
+
+      - alert: RedisOutOfSystemMemory
+        expr: >-
+          redis_memory_used_bytes
+          /
+          redis_total_system_memory_bytes * 100
+          > 90
+        for: 2m
+        labels:
+          severity: warning
+        annotations:
+          summary: Redis out of system memory (instance {{ $labels.instance }})
+          description: |-
+            Redis is running out of system memory (> 90%).
+
+              VALUE = {{ $value }}
+              LABELS = {{ $labels }}
+
+      - alert: RedisOutOfConfiguredMaxmemory
+        expr: >-
+          redis_memory_max_bytes != 0
+          and
+          (
+            redis_memory_used_bytes
+            /
+            redis_memory_max_bytes * 100
+            > 90
+          )
+        for: 2m
+        labels:
+          severity: warning
+        annotations:
+          summary: Redis out of configured maxmemory (instance {{ $labels.instance }})
+          description: |-
+            Redis is running out of configured maxmemory (> 90%).
+
+              VALUE = {{ $value }}
+              LABELS = {{ $labels }}
+
+      - alert: RedisTooManyConnections
+        expr: >-
+          redis_connected_clients > 100
+        for: 2m
+        labels:
+          severity: warning
+        annotations:
+          summary: Redis too many connections (instance {{ $labels.instance }})
+          description: |-
+            Redis instance has too many connections.
+
+              VALUE = {{ $value }}
+              LABELS = {{ $labels }}
+
+      - alert: RedisNotEnoughConnections
+        expr: >-
+          redis_connected_clients < 1
+        for: 2m
+        labels:
+          severity: warning
+        annotations:
+          summary: Redis not enough connections (instance {{ $labels.instance }})
+          description: |-
+            Redis instance should have more connections (> 1).
+
+              VALUE = {{ $value }}
+              LABELS = {{ $labels }}
+
+      - alert: RedisRejectedConnections
+        expr: >-
+          increase(redis_rejected_connections_total[1m]) > 0
+        for: 0m
+        labels:
+          severity: critical
+        annotations:
+          summary: Redis rejected connections (instance {{ $labels.instance }})
+          description: |-
+            Some connections to Redis have been rejected.
+
+              VALUE = {{ $value }}
+              LABELS = {{ $labels }}

Consider giving Nix/NixOS a try! <3