Files
zl-base-in-kube/4.monitoring/alerts.yaml
2024-08-02 18:12:53 +07:00

41 lines
1.2 KiB
YAML

apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
labels:
prometheus: zalando
role: alert-rules
name: prometheus-zalando-rules
namespace: zalando
spec:
groups:
- name: zalando.rules
rules:
- alert: PG exporter
annotations:
description: >-
Failed to scrape {{ $labels.job }} on {{ $labels.namespace }} for
more than 2 minutes.
title: PG exporter is down
expr: up{job="zalando-monitoring"} == 0
for: 2m
labels:
severity: warning
- alert: PG InstanceDown
annotations:
description: Failed to scrape {{$labels.namespace}} for more than 3 minutes.
title: PG Instance is down
expr: pg_up == 0
for: 3m
labels:
severity: critical
- alert: PG Lag
annotations:
description: >-
Failed replication on replica {{$labels.namespace}} for more than
3 minutes.
title: PG Peplication lag
expr: pg_replication_slots_pg_wal_lsn_diff > 1024
for: 3m
labels:
severity: critical