add mon
This commit is contained in:
41
4.monitoring/alerts.yaml
Normal file
41
4.monitoring/alerts.yaml
Normal file
@@ -0,0 +1,41 @@
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: PrometheusRule
|
||||
metadata:
|
||||
generation: 2
|
||||
labels:
|
||||
prometheus: zalando
|
||||
role: alert-rules
|
||||
name: prometheus-zalando-rules
|
||||
namespace: zalando
|
||||
spec:
|
||||
groups:
|
||||
- name: zalando.rules
|
||||
rules:
|
||||
- alert: PG exporter
|
||||
annotations:
|
||||
description: >-
|
||||
Failed to scrape {{ $labels.job }} on {{ $labels.namespace }} for
|
||||
more than 2 minutes.
|
||||
title: PG exporter is down
|
||||
expr: up{job="zalando-monitoring"} == 0
|
||||
for: 2m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PG InstanceDown
|
||||
annotations:
|
||||
description: Failed to scrape {{$labels.namespace}} for more than 3 minutes.
|
||||
title: PG Instance is down
|
||||
expr: pg_up == 0
|
||||
for: 3m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: PG Lag
|
||||
annotations:
|
||||
description: >-
|
||||
Failed replication on replica {{$labels.namespace}} for more than
|
||||
3 minutes.
|
||||
title: PG Peplication lag
|
||||
expr: pg_replication_slots_pg_wal_lsn_diff > 1024
|
||||
for: 3m
|
||||
labels:
|
||||
severity: critical
|
19
4.monitoring/podMonitor.yaml
Normal file
19
4.monitoring/podMonitor.yaml
Normal file
@@ -0,0 +1,19 @@
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: PodMonitor
|
||||
metadata:
|
||||
labels:
|
||||
app: psql-pg-exporter
|
||||
name: zalando-pg-exporter
|
||||
namespace: rm-pgsql
|
||||
spec:
|
||||
podMetricsEndpoints:
|
||||
- path: /metrics
|
||||
port: exporter
|
||||
relabelings:
|
||||
- action: replace
|
||||
replacement: rke-first-cluster
|
||||
targetLabel: cluster
|
||||
scheme: http
|
||||
selector:
|
||||
matchLabels:
|
||||
application: spilo
|
Reference in New Issue
Block a user