apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: name: kyverno-alerts namespace: kyverno labels: prometheus: kube-prometheus role: alert-rules spec: groups: - name: kyverno.availability rules: - alert: KyvernoDown expr: up{job="kyverno-svc-metrics"} == 0 for: 1m labels: severity: critical annotations: summary: "Kyverno недоступен" description: >- Admission controller Kyverno не отвечает более 1 минуты. Проверьте поды: kubectl get pods -n kyverno - alert: KyvernoAdmissionLatencyHigh expr: > histogram_quantile(0.95, sum(rate(kyverno_admission_review_duration_seconds_bucket[5m])) by (le) ) > 0.5 for: 5m labels: severity: warning annotations: summary: "Высокая латентность Kyverno admission (p95 > 500ms)" description: >- p95 латентность: {{ $value | humanizeDuration }}. Это замедляет деплойменты. Проверьте политики с apiCall в context. - alert: KyvernoAdmissionErrors expr: > rate(kyverno_admission_requests_total{ admission_request_type="error" }[5m]) > 0 for: 2m labels: severity: critical annotations: summary: "Ошибки обработки запросов в Kyverno" description: "Kyverno возвращает ошибки. Проверьте логи: kubectl logs -n kyverno -l app.kubernetes.io/component=admission-controller" - name: kyverno.policy rules: - alert: KyvernoCriticalPolicyViolation expr: > increase(kyverno_policy_results_total{ rule_result="fail", policy_name=~"disallow-privileged.*|disallow-host.*|disallow-dangerous.*" }[5m]) > 0 for: 0m labels: severity: critical annotations: summary: "Нарушение критической политики безопасности: {{ $labels.policy_name }}" description: >- Политика {{ $labels.policy_name }} была нарушена в namespace {{ $labels.resource_namespace }}. Немедленно проверьте: kubectl get policyreports -n {{ $labels.resource_namespace }} - alert: KyvernoHighViolationRate expr: > sum(increase(kyverno_policy_results_total{rule_result="fail"}[1h])) > 50 for: 5m labels: severity: warning annotations: summary: "Высокое количество нарушений политик (> 50 за час)" description: >- За последний час: {{ $value }} нарушений. Проверьте отчёты: kubectl get policyreports -A - name: kyverno.performance rules: - alert: KyvernoCPUThrottling expr: > rate(container_cpu_cfs_throttled_seconds_total{ namespace="kyverno", container=~"kyverno.*" }[5m]) > 0.1 for: 10m labels: severity: warning annotations: summary: "CPU throttling Kyverno — возможна деградация производительности" description: "Увеличьте CPU limit для Kyverno admission controller." - name: kyverno.recording rules: - record: kyverno:compliance_rate:5m expr: > sum(rate(kyverno_policy_results_total{rule_result="pass"}[5m])) / sum(rate(kyverno_policy_results_total[5m]))