diff options
Diffstat (limited to 'modules/nixos/monitoring/rules/postgres.yaml')
-rw-r--r-- | modules/nixos/monitoring/rules/postgres.yaml | 310 |
1 files changed, 0 insertions, 310 deletions
diff --git a/modules/nixos/monitoring/rules/postgres.yaml b/modules/nixos/monitoring/rules/postgres.yaml deleted file mode 100644 index 6a98c92..0000000 --- a/modules/nixos/monitoring/rules/postgres.yaml +++ /dev/null @@ -1,310 +0,0 @@ ---- -groups: - - name: Postgres - - rules: - - alert: PostgresqlDown - expr: >- - pg_up == 0 - for: 0m - labels: - severity: critical - annotations: - summary: PostgreSQL is down at {{ $labels.instance }}. - description: |- - PostgreSQL instance is down. - - VALUE = {{ $value }} - LABELS = {{ $labels }} - - - alert: PostgresqlRestarted - expr: >- - time() - pg_postmaster_start_time_seconds < 60 - for: 0m - labels: - severity: critical - annotations: - summary: PostgreSQL restarted at {{ $labels.instance }}. - description: |- - PostgreSQL restarted. - - VALUE = {{ $value }} - LABELS = {{ $labels }} - - - alert: PostgresqlExporterError - expr: >- - pg_exporter_last_scrape_error > 0 - for: 0m - labels: - severity: critical - annotations: - summary: PostgreSQL exporter errors at {{ $labels.instance }}. - description: |- - PostgreSQL exporter is showing errors. - - VALUE = {{ $value }} - LABELS = {{ $labels }} - - - alert: PostgresqlTableNotAutoVacuumed - expr: >- - (pg_stat_user_tables_last_autovacuum > 0) - and - (time() - pg_stat_user_tables_last_autovacuum) - > 60 * 60 * 24 * 10 - for: 0m - labels: - severity: warning - annotations: - summary: PostgreSQL table not auto vacuumed at {{ $labels.instance }}. - description: |- - Table {{ $labels.relname }} has not been auto vacuumed for 10 days. - - VALUE = {{ $value }} - LABELS = {{ $labels }} - - - alert: PostgresqlTableNotAutoAnalyzed - expr: >- - (pg_stat_user_tables_last_autoanalyze > 0) - and - (time() - pg_stat_user_tables_last_autoanalyze) - > 24 * 60 * 60 * 10 - for: 0m - labels: - severity: warning - annotations: - summary: PostgreSQL table not auto analyzed at {{ $labels.instance }}. - description: |- - Table {{ $labels.relname }} has not been auto analyzed for 10 days. - - VALUE = {{ $value }} - LABELS = {{ $labels }}" - - - alert: PostgresqlTooManyConnections - expr: >- - sum by (datname) ( - pg_stat_activity_count{datname!~"template.*|postgres"} - ) > pg_settings_max_connections * 0.8 - for: 2m - labels: - severity: warning - annotations: - summary: PostgreSQL with too many connections at {{ $labels.instance }}. - description: |- - PostgreSQL instance {{ $labels.instance }} has too many connections. - - VALUE = {{ $value }} - LABELS = {{ $labels }} - - - alert: PostgresqlNotEnoughConnections - expr: >- - sum by (datname) ( - pg_stat_activity_count{datname!~"template.*|postgres"} - ) < 1 - for: 2m - labels: - severity: warning - annotations: - summary: PostgreSQL with not enough connections at {{ $labels.instance }}. - description: |- - PostgreSQL instance {{ $labels.instance }} should have more connections. - - VALUE = {{ $value }} - LABELS = {{ $labels }} - - - alert: PostgresqlDeadLocks - expr: >- - increase( - pg_stat_database_deadlocks{datname!~"template.*|postgres"}[1m] - ) > 5 - for: 0m - labels: - severity: warning - annotations: - summary: PostgreSQL dead-locks at instance {{ $labels.instance }}. - description: |- - PostgreSQL shows dead-locks. - - VALUE = {{ $value }} - LABELS = {{ $labels }} - - - alert: PostgresqlHighRollbackRate - expr: >- - sum by (namespace,datname) - ( - (rate(pg_stat_database_xact_rollback{datname!~"template.*|postgres|dendrite",datid!="0"}[3m])) - / - ( - (rate(pg_stat_database_xact_rollback{datname!~"template.*|postgres|dendrite",datid!="0"}[3m])) - + - (rate(pg_stat_database_xact_commit{datname!~"template.*|postgres|dendrite",datid!="0"}[3m])) - ) - ) - > 0.10 - for: 0m - labels: - severity: warning - annotations: - summary: PostgreSQL at a high rollback rate at {{ $labels.instance }}. - description: |- - Ratio of transactions being aborted compared to committed is too big. - - VALUE = {{ $value }} - LABELS = {{ $labels }} - - - alert: PostgresqlCommitRateLow - expr: >- - rate(pg_stat_database_xact_commit[1m]) - < 10 - for: 2m - labels: - severity: critical - annotations: - summary: PostgreSQL commit rate low at instance {{ $labels.instance }}. - description: |- - PostgreSQL seems to be processing very few transactions. - - VALUE = {{ $value }} - LABELS = {{ $labels }} - - - alert: PostgresqlLowXidConsumption - expr: >- - rate(pg_txid_current[1m]) - < 5 - for: 2m - labels: - severity: warning - annotations: - summary: PostgreSQL low XID consumption at instance {{ $labels.instance }}. - description: |- - PostgreSQL seems to be consuming transaction IDs very slowly. - - VALUE = {{ $value }} - LABELS = {{ $labels }} - - - alert: PostgresqlHighRateStatementTimeout - expr: >- - rate(postgresql_errors_total{type="statement_timeout"}[1m]) - > 3 - for: 0m - labels: - severity: critical - annotations: - summary: PostgreSQL high rate statement timeout (instance {{ $labels.instance }}) - description: |- - PostgreSQL transactions showing high rate of statement timeouts. - - VALUE = {{ $value }} - LABELS = {{ $labels }} - - - alert: PostgresqlHighRateDeadlock - expr: >- - increase(postgresql_errors_total{type="deadlock_detected"}[1m]) - > 1 - for: 0m - labels: - severity: critical - annotations: - summary: PostgreSQL high rate dead-lock at {{ $labels.instance }}. - description: |- - PostgreSQL has detected dead-locks. - - VALUE = {{ $value }} - LABELS = {{ $labels }} - - - alert: PostgresqlUnusedReplicationSlot - expr: >- - pg_replication_slots_active == 0 - for: 1m - labels: - severity: warning - annotations: - summary: PostgreSQL unused replication slot at {{ $labels.instance }}. - description: |- - Unused replication slots. - - VALUE = {{ $value }} - LABELS = {{ $labels }} - - - alert: PostgresqlTooManyDeadTuples - expr: >- - ( - (pg_stat_user_tables_n_dead_tup > 10000) - / - (pg_stat_user_tables_n_live_tup + pg_stat_user_tables_n_dead_tup) - ) - >= 0.1 - for: 2m - labels: - severity: warning - annotations: - summary: PostgreSQL too many dead tuples at {{ $labels.instance }}. - description: |- - PostgreSQL number of dead tuples is too large. - - VALUE = {{ $value }} - LABELS = {{ $labels }} - - - alert: PostgresqlSslCompressionActive - expr: >- - sum(pg_stat_ssl_compression) > 0 - for: 0m - labels: - severity: critical - annotations: - summary: PostgreSQL SSL compression active at {{ $labels.instance }}. - description: |- - Database connections with an SSL compression is enabled. This may add a significant jitter in the replication delay. - - VALUE = {{ $value }} - LABELS = {{ $labels }} - - - alert: PostgresqlTooManyLocksAcquired - expr: >- - ( - (sum (pg_locks_count)) - / - (pg_settings_max_locks_per_transaction * pg_settings_max_connections) - ) - > 0.20 - for: 2m - labels: - severity: critical - annotations: - summary: PostgreSQL too many locks acquired at {{ $labels.instance }}. - description: |- - Too many locks acquired on the database. - - VALUE = {{ $value }} - LABELS = {{ $labels }} - - - alert: PostgresqlBloatIndexHigh - expr: >- - pg_bloat_btree_bloat_pct > 80 - and - on (idxname) (pg_bloat_btree_real_size > 100000000) - for: 1h - labels: - severity: warning - annotations: - summary: PostgreSQL index bloat high at {{ $labels.instance }}. - description: |- - The index {{ $labels.idxname }} is bloated. You should execute `REINDEX INDEX CONCURRENTLY {{ $labels.idxname }};`. - - VALUE = {{ $value }} - LABELS = {{ $labels }} - - - alert: PostgresqlBloatTableHigh - expr: >- - pg_bloat_table_bloat_pct > 80 - and - on (relname) (pg_bloat_table_real_size > 200000000) - for: 1h - labels: - severity: warning - annotations: - summary: PostgreSQL table bloat high at instance {{ $labels.instance }}. - description: |- - The table {{ $labels.relname }} is bloated. You should execute `VACUUM {{ $labels.relname }};`. - - VALUE = {{ $value }} - LABELS = {{ $labels }} |