100 lines
3.5 KiB
YAML
100 lines
3.5 KiB
YAML
apiVersion: monitoring.coreos.com/v1
|
|
kind: PrometheusRule
|
|
metadata:
|
|
name: kyverno-alerts
|
|
namespace: kyverno
|
|
labels:
|
|
prometheus: kube-prometheus
|
|
role: alert-rules
|
|
spec:
|
|
groups:
|
|
- name: kyverno.availability
|
|
rules:
|
|
- alert: KyvernoDown
|
|
expr: up{job="kyverno-svc-metrics"} == 0
|
|
for: 1m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: "Kyverno недоступен"
|
|
description: >-
|
|
Admission controller Kyverno не отвечает более 1 минуты.
|
|
Проверьте поды: kubectl get pods -n kyverno
|
|
|
|
- alert: KyvernoAdmissionLatencyHigh
|
|
expr: >
|
|
histogram_quantile(0.95,
|
|
sum(rate(kyverno_admission_review_duration_seconds_bucket[5m])) by (le)
|
|
) > 0.5
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "Высокая латентность Kyverno admission (p95 > 500ms)"
|
|
description: >-
|
|
p95 латентность: {{ $value | humanizeDuration }}.
|
|
Это замедляет деплойменты. Проверьте политики с apiCall в context.
|
|
|
|
- alert: KyvernoAdmissionErrors
|
|
expr: >
|
|
rate(kyverno_admission_requests_total{
|
|
admission_request_type="error"
|
|
}[5m]) > 0
|
|
for: 2m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: "Ошибки обработки запросов в Kyverno"
|
|
description: "Kyverno возвращает ошибки. Проверьте логи: kubectl logs -n kyverno -l app.kubernetes.io/component=admission-controller"
|
|
|
|
- name: kyverno.policy
|
|
rules:
|
|
- alert: KyvernoCriticalPolicyViolation
|
|
expr: >
|
|
increase(kyverno_policy_results_total{
|
|
rule_result="fail",
|
|
policy_name=~"disallow-privileged.*|disallow-host.*|disallow-dangerous.*"
|
|
}[5m]) > 0
|
|
for: 0m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: "Нарушение критической политики безопасности: {{ $labels.policy_name }}"
|
|
description: >-
|
|
Политика {{ $labels.policy_name }} была нарушена в namespace {{ $labels.resource_namespace }}.
|
|
Немедленно проверьте: kubectl get policyreports -n {{ $labels.resource_namespace }}
|
|
|
|
- alert: KyvernoHighViolationRate
|
|
expr: >
|
|
sum(increase(kyverno_policy_results_total{rule_result="fail"}[1h])) > 50
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "Высокое количество нарушений политик (> 50 за час)"
|
|
description: >-
|
|
За последний час: {{ $value }} нарушений.
|
|
Проверьте отчёты: kubectl get policyreports -A
|
|
|
|
- name: kyverno.performance
|
|
rules:
|
|
- alert: KyvernoCPUThrottling
|
|
expr: >
|
|
rate(container_cpu_cfs_throttled_seconds_total{
|
|
namespace="kyverno",
|
|
container=~"kyverno.*"
|
|
}[5m]) > 0.1
|
|
for: 10m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "CPU throttling Kyverno — возможна деградация производительности"
|
|
description: "Увеличьте CPU limit для Kyverno admission controller."
|
|
|
|
- name: kyverno.recording
|
|
rules:
|
|
- record: kyverno:compliance_rate:5m
|
|
expr: >
|
|
sum(rate(kyverno_policy_results_total{rule_result="pass"}[5m])) /
|
|
sum(rate(kyverno_policy_results_total[5m]))
|