about summary refs log tree commit diff
path: root/modules/nixos/monitoring/rules
diff options
context:
space:
mode:
authorAzat Bahawi <azat@bahawi.net>2023-04-14 02:51:09 +0300
committerAzat Bahawi <azat@bahawi.net>2023-04-14 02:51:09 +0300
commit39ed30937ec29217820583e07ff1f447d08b9898 (patch)
tree96dc9d1a62e320c89510cd910add2ce8fb819850 /modules/nixos/monitoring/rules
parent2023-04-12 (diff)
2023-04-14
Diffstat (limited to '')
-rw-r--r--modules/nixos/monitoring/rules/node.yaml19
-rw-r--r--modules/nixos/monitoring/rules/redis.yaml89
2 files changed, 1 insertions, 107 deletions
diff --git a/modules/nixos/monitoring/rules/node.yaml b/modules/nixos/monitoring/rules/node.yaml
index 98217b3..eee5939 100644
--- a/modules/nixos/monitoring/rules/node.yaml
+++ b/modules/nixos/monitoring/rules/node.yaml
@@ -238,28 +238,11 @@ groups:
             VALUE = {{ $value }}
             LABELS = {{ $labels }}
 
-      - alert: HostCpuStealNoisyNeighbor
-        expr: >-
-          avg by(instance) (rate(node_cpu_seconds_total{mode="steal"}[5m]))
-          * 100
-          > 15
-        for: 0m
-        labels:
-          severity: warning
-        annotations:
-          summary: Host CPU steal noisy neighbor at {{ $labels.instance }}.
-          description: |-
-            CPU steal is > 10%. A noisy neighbor is killing VM performances or a
-            spot instance may be out of credit.
-
-              VALUE = {{ $value }}
-              LABELS = {{ $labels }}
-
       - alert: HostCpuHighIowait
         expr: |-
           avg by (instance) (rate(node_cpu_seconds_total{mode="iowait"}[5m]))
           * 100
-          > 15
+          > 50
         for: 0m
         labels:
           severity: warning
diff --git a/modules/nixos/monitoring/rules/redis.yaml b/modules/nixos/monitoring/rules/redis.yaml
index c07c819..b47c313 100644
--- a/modules/nixos/monitoring/rules/redis.yaml
+++ b/modules/nixos/monitoring/rules/redis.yaml
@@ -17,95 +17,6 @@ groups:
             VALUE = {{ $value }}
             LABELS = {{ $labels }}
 
-      - alert: RedisMissingMaster
-        expr: >-
-          (count(redis_instance_info{role="master"}) or vector(0))
-          < 1
-        for: 0m
-        labels:
-          severity: critical
-        annotations:
-          summary: Redis missing master at {{ $labels.instance }}).
-          description: |-
-            Redis cluster has no node marked as a master.
-
-            VALUE = {{ $value }}
-            LABELS = {{ $labels }}
-
-      - alert: RedisTooManyMasters
-        expr: >-
-          count(redis_instance_info{role="master"}) > 1
-        for: 0m
-        labels:
-          severity: critical
-        annotations:
-          summary: Redis too many masters at {{ $labels.instance }}.
-          description: |-
-            Redis cluster has too many nodes marked as a master.
-
-            VALUE = {{ $value }}
-            LABELS = {{ $labels }}
-
-      - alert: RedisDisconnectedSlaves
-        expr: >-
-          count without (instance, job) (redis_connected_slaves)
-          - sum without (instance, job) (redis_connected_slaves)
-          - 1
-          > 1
-        for: 0m
-        labels:
-          severity: critical
-        annotations:
-          summary: Redis disconnected slaves at {{ $labels.instance }}.
-          description: |-
-            Redis is not replicating for all slaves.
-
-            VALUE = {{ $value }}
-            LABELS = {{ $labels }}
-
-      - alert: RedisReplicationBroken
-        expr: >-
-          delta(redis_connected_slaves[1m]) < 0
-        for: 0m
-        labels:
-          severity: critical
-        annotations:
-          summary: Redis replication broken at {{ $labels.instance }}.
-          description: |-
-            Redis instance lost a slave.
-
-            VALUE = {{ $value }}
-            LABELS = {{ $labels }}
-
-      - alert: RedisClusterFlapping
-        expr: >-
-          changes(redis_connected_slaves[1m]) > 1
-        for: 2m
-        labels:
-          severity: critical
-        annotations:
-          summary: Redis cluster flapping at {{ $labels.instance }}.
-          description: |-
-            Changes have been detected in the Redis replica connection. This can occur when replica nodes lose connection to the master and reconnect (a.k.a flapping).
-
-            VALUE = {{ $value }}
-            LABELS = {{ $labels }}
-
-      - alert: RedisMissingBackup
-        expr: >-
-          time() - redis_rdb_last_save_timestamp_seconds
-          > 60 * 60 * 24
-        for: 0m
-        labels:
-          severity: critical
-        annotations:
-          summary: Redis missing backup at {{ $labels.instance }}.
-          description: |-
-            Redis has not been backed up for 24 hours.
-
-            VALUE = {{ $value }}
-            LABELS = {{ $labels }}
-
       - alert: RedisOutOfSystemMemory
         expr: >-
           redis_memory_used_bytes

Consider giving Nix/NixOS a try! <3