100 lines
3.5 KiB
YAML
100 lines
3.5 KiB
YAML
apiVersion: monitoring.coreos.com/v1
|
||
kind: PrometheusRule
|
||
metadata:
|
||
name: kyverno-alerts
|
||
namespace: kyverno
|
||
labels:
|
||
prometheus: kube-prometheus
|
||
role: alert-rules
|
||
spec:
|
||
groups:
|
||
- name: kyverno.availability
|
||
rules:
|
||
- alert: KyvernoDown
|
||
expr: up{job="kyverno-svc-metrics"} == 0
|
||
for: 1m
|
||
labels:
|
||
severity: critical
|
||
annotations:
|
||
summary: "Kyverno недоступен"
|
||
description: >-
|
||
Admission controller Kyverno не отвечает более 1 минуты.
|
||
Проверьте поды: kubectl get pods -n kyverno
|
||
|
||
- alert: KyvernoAdmissionLatencyHigh
|
||
expr: >
|
||
histogram_quantile(0.95,
|
||
sum(rate(kyverno_admission_review_duration_seconds_bucket[5m])) by (le)
|
||
) > 0.5
|
||
for: 5m
|
||
labels:
|
||
severity: warning
|
||
annotations:
|
||
summary: "Высокая латентность Kyverno admission (p95 > 500ms)"
|
||
description: >-
|
||
p95 латентность: {{ $value | humanizeDuration }}.
|
||
Это замедляет деплойменты. Проверьте политики с apiCall в context.
|
||
|
||
- alert: KyvernoAdmissionErrors
|
||
expr: >
|
||
rate(kyverno_admission_requests_total{
|
||
admission_request_type="error"
|
||
}[5m]) > 0
|
||
for: 2m
|
||
labels:
|
||
severity: critical
|
||
annotations:
|
||
summary: "Ошибки обработки запросов в Kyverno"
|
||
description: "Kyverno возвращает ошибки. Проверьте логи: kubectl logs -n kyverno -l app.kubernetes.io/component=admission-controller"
|
||
|
||
- name: kyverno.policy
|
||
rules:
|
||
- alert: KyvernoCriticalPolicyViolation
|
||
expr: >
|
||
increase(kyverno_policy_results_total{
|
||
rule_result="fail",
|
||
policy_name=~"disallow-privileged.*|disallow-host.*|disallow-dangerous.*"
|
||
}[5m]) > 0
|
||
for: 0m
|
||
labels:
|
||
severity: critical
|
||
annotations:
|
||
summary: "Нарушение критической политики безопасности: {{ $labels.policy_name }}"
|
||
description: >-
|
||
Политика {{ $labels.policy_name }} была нарушена в namespace {{ $labels.resource_namespace }}.
|
||
Немедленно проверьте: kubectl get policyreports -n {{ $labels.resource_namespace }}
|
||
|
||
- alert: KyvernoHighViolationRate
|
||
expr: >
|
||
sum(increase(kyverno_policy_results_total{rule_result="fail"}[1h])) > 50
|
||
for: 5m
|
||
labels:
|
||
severity: warning
|
||
annotations:
|
||
summary: "Высокое количество нарушений политик (> 50 за час)"
|
||
description: >-
|
||
За последний час: {{ $value }} нарушений.
|
||
Проверьте отчёты: kubectl get policyreports -A
|
||
|
||
- name: kyverno.performance
|
||
rules:
|
||
- alert: KyvernoCPUThrottling
|
||
expr: >
|
||
rate(container_cpu_cfs_throttled_seconds_total{
|
||
namespace="kyverno",
|
||
container=~"kyverno.*"
|
||
}[5m]) > 0.1
|
||
for: 10m
|
||
labels:
|
||
severity: warning
|
||
annotations:
|
||
summary: "CPU throttling Kyverno — возможна деградация производительности"
|
||
description: "Увеличьте CPU limit для Kyverno admission controller."
|
||
|
||
- name: kyverno.recording
|
||
rules:
|
||
- record: kyverno:compliance_rate:5m
|
||
expr: >
|
||
sum(rate(kyverno_policy_results_total{rule_result="pass"}[5m])) /
|
||
sum(rate(kyverno_policy_results_total[5m]))
|