summaryrefslogtreecommitdiff
path: root/modules/nixos/monitoring/rules/postgres.yaml
diff options
context:
space:
mode:
authorAzat Bahawi <azat@bahawi.net>2024-04-21 02:15:42 +0300
committerAzat Bahawi <azat@bahawi.net>2024-04-21 02:15:42 +0300
commite6ed60548397627bf10f561f9438201dbba0a36e (patch)
treef9a84c5957d2cc4fcd148065ee9365a0c851ae1c /modules/nixos/monitoring/rules/postgres.yaml
parent9ac64328603d44bd272175942d3ea3eaadcabd04 (diff)
2024-04-21
Diffstat (limited to 'modules/nixos/monitoring/rules/postgres.yaml')
-rw-r--r--modules/nixos/monitoring/rules/postgres.yaml310
1 files changed, 0 insertions, 310 deletions
diff --git a/modules/nixos/monitoring/rules/postgres.yaml b/modules/nixos/monitoring/rules/postgres.yaml
deleted file mode 100644
index 6a98c92..0000000
--- a/modules/nixos/monitoring/rules/postgres.yaml
+++ /dev/null
@@ -1,310 +0,0 @@
----
-groups:
- - name: Postgres
-
- rules:
- - alert: PostgresqlDown
- expr: >-
- pg_up == 0
- for: 0m
- labels:
- severity: critical
- annotations:
- summary: PostgreSQL is down at {{ $labels.instance }}.
- description: |-
- PostgreSQL instance is down.
-
- VALUE = {{ $value }}
- LABELS = {{ $labels }}
-
- - alert: PostgresqlRestarted
- expr: >-
- time() - pg_postmaster_start_time_seconds < 60
- for: 0m
- labels:
- severity: critical
- annotations:
- summary: PostgreSQL restarted at {{ $labels.instance }}.
- description: |-
- PostgreSQL restarted.
-
- VALUE = {{ $value }}
- LABELS = {{ $labels }}
-
- - alert: PostgresqlExporterError
- expr: >-
- pg_exporter_last_scrape_error > 0
- for: 0m
- labels:
- severity: critical
- annotations:
- summary: PostgreSQL exporter errors at {{ $labels.instance }}.
- description: |-
- PostgreSQL exporter is showing errors.
-
- VALUE = {{ $value }}
- LABELS = {{ $labels }}
-
- - alert: PostgresqlTableNotAutoVacuumed
- expr: >-
- (pg_stat_user_tables_last_autovacuum > 0)
- and
- (time() - pg_stat_user_tables_last_autovacuum)
- > 60 * 60 * 24 * 10
- for: 0m
- labels:
- severity: warning
- annotations:
- summary: PostgreSQL table not auto vacuumed at {{ $labels.instance }}.
- description: |-
- Table {{ $labels.relname }} has not been auto vacuumed for 10 days.
-
- VALUE = {{ $value }}
- LABELS = {{ $labels }}
-
- - alert: PostgresqlTableNotAutoAnalyzed
- expr: >-
- (pg_stat_user_tables_last_autoanalyze > 0)
- and
- (time() - pg_stat_user_tables_last_autoanalyze)
- > 24 * 60 * 60 * 10
- for: 0m
- labels:
- severity: warning
- annotations:
- summary: PostgreSQL table not auto analyzed at {{ $labels.instance }}.
- description: |-
- Table {{ $labels.relname }} has not been auto analyzed for 10 days.
-
- VALUE = {{ $value }}
- LABELS = {{ $labels }}"
-
- - alert: PostgresqlTooManyConnections
- expr: >-
- sum by (datname) (
- pg_stat_activity_count{datname!~"template.*|postgres"}
- ) > pg_settings_max_connections * 0.8
- for: 2m
- labels:
- severity: warning
- annotations:
- summary: PostgreSQL with too many connections at {{ $labels.instance }}.
- description: |-
- PostgreSQL instance {{ $labels.instance }} has too many connections.
-
- VALUE = {{ $value }}
- LABELS = {{ $labels }}
-
- - alert: PostgresqlNotEnoughConnections
- expr: >-
- sum by (datname) (
- pg_stat_activity_count{datname!~"template.*|postgres"}
- ) < 1
- for: 2m
- labels:
- severity: warning
- annotations:
- summary: PostgreSQL with not enough connections at {{ $labels.instance }}.
- description: |-
- PostgreSQL instance {{ $labels.instance }} should have more connections.
-
- VALUE = {{ $value }}
- LABELS = {{ $labels }}
-
- - alert: PostgresqlDeadLocks
- expr: >-
- increase(
- pg_stat_database_deadlocks{datname!~"template.*|postgres"}[1m]
- ) > 5
- for: 0m
- labels:
- severity: warning
- annotations:
- summary: PostgreSQL dead-locks at instance {{ $labels.instance }}.
- description: |-
- PostgreSQL shows dead-locks.
-
- VALUE = {{ $value }}
- LABELS = {{ $labels }}
-
- - alert: PostgresqlHighRollbackRate
- expr: >-
- sum by (namespace,datname)
- (
- (rate(pg_stat_database_xact_rollback{datname!~"template.*|postgres|dendrite",datid!="0"}[3m]))
- /
- (
- (rate(pg_stat_database_xact_rollback{datname!~"template.*|postgres|dendrite",datid!="0"}[3m]))
- +
- (rate(pg_stat_database_xact_commit{datname!~"template.*|postgres|dendrite",datid!="0"}[3m]))
- )
- )
- > 0.10
- for: 0m
- labels:
- severity: warning
- annotations:
- summary: PostgreSQL at a high rollback rate at {{ $labels.instance }}.
- description: |-
- Ratio of transactions being aborted compared to committed is too big.
-
- VALUE = {{ $value }}
- LABELS = {{ $labels }}
-
- - alert: PostgresqlCommitRateLow
- expr: >-
- rate(pg_stat_database_xact_commit[1m])
- < 10
- for: 2m
- labels:
- severity: critical
- annotations:
- summary: PostgreSQL commit rate low at instance {{ $labels.instance }}.
- description: |-
- PostgreSQL seems to be processing very few transactions.
-
- VALUE = {{ $value }}
- LABELS = {{ $labels }}
-
- - alert: PostgresqlLowXidConsumption
- expr: >-
- rate(pg_txid_current[1m])
- < 5
- for: 2m
- labels:
- severity: warning
- annotations:
- summary: PostgreSQL low XID consumption at instance {{ $labels.instance }}.
- description: |-
- PostgreSQL seems to be consuming transaction IDs very slowly.
-
- VALUE = {{ $value }}
- LABELS = {{ $labels }}
-
- - alert: PostgresqlHighRateStatementTimeout
- expr: >-
- rate(postgresql_errors_total{type="statement_timeout"}[1m])
- > 3
- for: 0m
- labels:
- severity: critical
- annotations:
- summary: PostgreSQL high rate statement timeout (instance {{ $labels.instance }})
- description: |-
- PostgreSQL transactions showing high rate of statement timeouts.
-
- VALUE = {{ $value }}
- LABELS = {{ $labels }}
-
- - alert: PostgresqlHighRateDeadlock
- expr: >-
- increase(postgresql_errors_total{type="deadlock_detected"}[1m])
- > 1
- for: 0m
- labels:
- severity: critical
- annotations:
- summary: PostgreSQL high rate dead-lock at {{ $labels.instance }}.
- description: |-
- PostgreSQL has detected dead-locks.
-
- VALUE = {{ $value }}
- LABELS = {{ $labels }}
-
- - alert: PostgresqlUnusedReplicationSlot
- expr: >-
- pg_replication_slots_active == 0
- for: 1m
- labels:
- severity: warning
- annotations:
- summary: PostgreSQL unused replication slot at {{ $labels.instance }}.
- description: |-
- Unused replication slots.
-
- VALUE = {{ $value }}
- LABELS = {{ $labels }}
-
- - alert: PostgresqlTooManyDeadTuples
- expr: >-
- (
- (pg_stat_user_tables_n_dead_tup > 10000)
- /
- (pg_stat_user_tables_n_live_tup + pg_stat_user_tables_n_dead_tup)
- )
- >= 0.1
- for: 2m
- labels:
- severity: warning
- annotations:
- summary: PostgreSQL too many dead tuples at {{ $labels.instance }}.
- description: |-
- PostgreSQL number of dead tuples is too large.
-
- VALUE = {{ $value }}
- LABELS = {{ $labels }}
-
- - alert: PostgresqlSslCompressionActive
- expr: >-
- sum(pg_stat_ssl_compression) > 0
- for: 0m
- labels:
- severity: critical
- annotations:
- summary: PostgreSQL SSL compression active at {{ $labels.instance }}.
- description: |-
- Database connections with an SSL compression is enabled. This may add a significant jitter in the replication delay.
-
- VALUE = {{ $value }}
- LABELS = {{ $labels }}
-
- - alert: PostgresqlTooManyLocksAcquired
- expr: >-
- (
- (sum (pg_locks_count))
- /
- (pg_settings_max_locks_per_transaction * pg_settings_max_connections)
- )
- > 0.20
- for: 2m
- labels:
- severity: critical
- annotations:
- summary: PostgreSQL too many locks acquired at {{ $labels.instance }}.
- description: |-
- Too many locks acquired on the database.
-
- VALUE = {{ $value }}
- LABELS = {{ $labels }}
-
- - alert: PostgresqlBloatIndexHigh
- expr: >-
- pg_bloat_btree_bloat_pct > 80
- and
- on (idxname) (pg_bloat_btree_real_size > 100000000)
- for: 1h
- labels:
- severity: warning
- annotations:
- summary: PostgreSQL index bloat high at {{ $labels.instance }}.
- description: |-
- The index {{ $labels.idxname }} is bloated. You should execute `REINDEX INDEX CONCURRENTLY {{ $labels.idxname }};`.
-
- VALUE = {{ $value }}
- LABELS = {{ $labels }}
-
- - alert: PostgresqlBloatTableHigh
- expr: >-
- pg_bloat_table_bloat_pct > 80
- and
- on (relname) (pg_bloat_table_real_size > 200000000)
- for: 1h
- labels:
- severity: warning
- annotations:
- summary: PostgreSQL table bloat high at instance {{ $labels.instance }}.
- description: |-
- The table {{ $labels.relname }} is bloated. You should execute `VACUUM {{ $labels.relname }};`.
-
- VALUE = {{ $value }}
- LABELS = {{ $labels }}