apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: generation: 2 labels: prometheus: zalando role: alert-rules name: prometheus-zalando-rules namespace: zalando spec: groups: - name: zalando.rules rules: - alert: PG exporter annotations: description: >- Failed to scrape {{ $labels.job }} on {{ $labels.namespace }} for more than 2 minutes. title: PG exporter is down expr: up{job="zalando-monitoring"} == 0 for: 2m labels: severity: warning - alert: PG InstanceDown annotations: description: Failed to scrape {{$labels.namespace}} for more than 3 minutes. title: PG Instance is down expr: pg_up == 0 for: 3m labels: severity: critical - alert: PG Lag annotations: description: >- Failed replication on replica {{$labels.namespace}} for more than 3 minutes. title: PG Peplication lag expr: pg_replication_slots_pg_wal_lsn_diff > 1024 for: 3m labels: severity: critical