init
This commit is contained in:
134
06-monitoring/01-logging/README.md
Normal file
134
06-monitoring/01-logging/README.md
Normal file
@@ -0,0 +1,134 @@
|
||||
# Урок 6.1 — Логирование и мониторинг Kyverno
|
||||
|
||||
## Файлы
|
||||
|
||||
| Файл | Описание |
|
||||
|------|----------|
|
||||
| `grafana-dashboard.json` | Готовый Grafana dashboard |
|
||||
| `../03-reporting/prometheus-alert-rules.yaml` | Alerting правила |
|
||||
| `../03-reporting/service-monitor.yaml` | ServiceMonitor |
|
||||
|
||||
## Настройка уровня логирования
|
||||
|
||||
```bash
|
||||
# Изменить уровень логирования без перезапуска (через Helm upgrade)
|
||||
helm upgrade kyverno kyverno/kyverno \
|
||||
--namespace kyverno \
|
||||
--reuse-values \
|
||||
--set admissionController.extraArgs="{--v=4\,--loggingFormat=json}"
|
||||
|
||||
# Уровни:
|
||||
# --v=1 production (warnings + important events)
|
||||
# --v=2 informational (policy apply/deny events)
|
||||
# --v=4 debug (full AdmissionReview)
|
||||
# --v=6 trace (очень детально, только для отладки)
|
||||
```
|
||||
|
||||
## Просмотр логов
|
||||
|
||||
```bash
|
||||
# Логи admission controller
|
||||
kubectl logs -n kyverno \
|
||||
-l app.kubernetes.io/component=admission-controller \
|
||||
--tail=100 -f
|
||||
|
||||
# Логи background controller (для generate политик)
|
||||
kubectl logs -n kyverno \
|
||||
-l app.kubernetes.io/component=background-controller \
|
||||
--tail=50
|
||||
|
||||
# Фильтрация — только нарушения
|
||||
kubectl logs -n kyverno \
|
||||
-l app.kubernetes.io/component=admission-controller \
|
||||
--tail=200 | grep '"result":"fail"'
|
||||
|
||||
# Фильтрация — конкретная политика
|
||||
kubectl logs -n kyverno \
|
||||
-l app.kubernetes.io/component=admission-controller \
|
||||
--tail=200 | grep '"policy":"require-resource-limits"'
|
||||
```
|
||||
|
||||
## Метрики через port-forward
|
||||
|
||||
```bash
|
||||
# Открыть порт metrics
|
||||
kubectl port-forward -n kyverno \
|
||||
svc/kyverno-svc-metrics 8000:8000 &
|
||||
|
||||
# Все метрики
|
||||
curl -s http://localhost:8000/metrics | grep kyverno_ | head -40
|
||||
|
||||
# Конкретная метрика
|
||||
curl -s http://localhost:8000/metrics | \
|
||||
grep kyverno_policy_results_total
|
||||
|
||||
# Топ метрик по нарушениям
|
||||
curl -s http://localhost:8000/metrics | \
|
||||
grep 'kyverno_policy_results_total.*rule_result="fail"'
|
||||
|
||||
# Остановить port-forward
|
||||
kill %1
|
||||
```
|
||||
|
||||
## Импорт Grafana Dashboard
|
||||
|
||||
```bash
|
||||
# Способ 1: через UI
|
||||
# Grafana → Dashboards → Import → Upload JSON file → grafana-dashboard.json
|
||||
|
||||
# Способ 2: через ConfigMap (если используете Grafana Operator)
|
||||
kubectl create configmap kyverno-dashboard \
|
||||
--from-file=grafana-dashboard.json \
|
||||
--namespace=monitoring
|
||||
|
||||
kubectl label configmap kyverno-dashboard \
|
||||
-n monitoring \
|
||||
grafana_dashboard=1
|
||||
```
|
||||
|
||||
## Ключевые PromQL запросы
|
||||
|
||||
```promql
|
||||
# Compliance rate (цель: 100%)
|
||||
sum(rate(kyverno_policy_results_total{rule_result="pass"}[5m])) /
|
||||
sum(rate(kyverno_policy_results_total[5m])) * 100
|
||||
|
||||
# p95 латентность в миллисекундах
|
||||
histogram_quantile(0.95,
|
||||
sum(rate(kyverno_admission_review_duration_seconds_bucket[5m])) by (le)
|
||||
) * 1000
|
||||
|
||||
# Нарушений за сутки по политикам
|
||||
topk(10, sum by(policy_name)(
|
||||
increase(kyverno_policy_results_total{rule_result="fail"}[24h])
|
||||
))
|
||||
|
||||
# Нарушений за сутки по namespace
|
||||
sum by(resource_namespace)(
|
||||
increase(kyverno_policy_results_total{rule_result="fail"}[24h])
|
||||
)
|
||||
|
||||
# Процент ошибок обработки
|
||||
rate(kyverno_admission_requests_total{admission_request_type="error"}[5m]) /
|
||||
rate(kyverno_admission_requests_total[5m]) * 100
|
||||
|
||||
# CPU throttling Kyverno
|
||||
rate(container_cpu_cfs_throttled_seconds_total{
|
||||
namespace="kyverno",
|
||||
container=~"kyverno.*"
|
||||
}[5m])
|
||||
```
|
||||
|
||||
## События Kubernetes
|
||||
|
||||
```bash
|
||||
# PolicyViolation события
|
||||
kubectl get events -A \
|
||||
--field-selector reason=PolicyViolation \
|
||||
--sort-by='.lastTimestamp'
|
||||
|
||||
# События от Kyverno
|
||||
kubectl get events -A \
|
||||
--field-selector source.component=kyverno-admission \
|
||||
--sort-by='.lastTimestamp' | tail -20
|
||||
```
|
||||
130
06-monitoring/01-logging/grafana-dashboard.json
Normal file
130
06-monitoring/01-logging/grafana-dashboard.json
Normal file
@@ -0,0 +1,130 @@
|
||||
{
|
||||
"title": "Kyverno Policy Dashboard",
|
||||
"uid": "kyverno-main",
|
||||
"tags": ["kyverno", "policy", "security"],
|
||||
"timezone": "browser",
|
||||
"refresh": "30s",
|
||||
"panels": [
|
||||
{
|
||||
"id": 1,
|
||||
"title": "Policy Compliance Rate",
|
||||
"type": "stat",
|
||||
"gridPos": {"h": 4, "w": 6, "x": 0, "y": 0},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(kyverno_policy_results_total{rule_result=\"pass\"}[5m])) / sum(rate(kyverno_policy_results_total[5m])) * 100",
|
||||
"legendFormat": "Compliance %"
|
||||
}
|
||||
],
|
||||
"options": {
|
||||
"reduceOptions": {"calcs": ["lastNotNull"]},
|
||||
"thresholds": {
|
||||
"steps": [
|
||||
{"color": "red", "value": 0},
|
||||
{"color": "yellow", "value": 90},
|
||||
{"color": "green", "value": 99}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"title": "Admission Requests/s",
|
||||
"type": "stat",
|
||||
"gridPos": {"h": 4, "w": 6, "x": 6, "y": 0},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(kyverno_admission_requests_total[5m]))",
|
||||
"legendFormat": "Requests/s"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"title": "Admission Latency p95 (ms)",
|
||||
"type": "stat",
|
||||
"gridPos": {"h": 4, "w": 6, "x": 12, "y": 0},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.95, sum(rate(kyverno_admission_review_duration_seconds_bucket[5m])) by (le)) * 1000",
|
||||
"legendFormat": "p95 latency"
|
||||
}
|
||||
],
|
||||
"options": {
|
||||
"thresholds": {
|
||||
"steps": [
|
||||
{"color": "green", "value": 0},
|
||||
{"color": "yellow", "value": 100},
|
||||
{"color": "red", "value": 500}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"title": "Total Violations (24h)",
|
||||
"type": "stat",
|
||||
"gridPos": {"h": 4, "w": 6, "x": 18, "y": 0},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(increase(kyverno_policy_results_total{rule_result=\"fail\"}[24h]))",
|
||||
"legendFormat": "Violations"
|
||||
}
|
||||
],
|
||||
"options": {
|
||||
"thresholds": {
|
||||
"steps": [
|
||||
{"color": "green", "value": 0},
|
||||
{"color": "yellow", "value": 10},
|
||||
{"color": "red", "value": 100}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"title": "Top Violated Policies (1h)",
|
||||
"type": "bargauge",
|
||||
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 4},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "topk(10, sum by(policy_name)(increase(kyverno_policy_results_total{rule_result=\"fail\"}[1h])))",
|
||||
"legendFormat": "{{policy_name}}"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
"title": "Violations by Namespace (24h)",
|
||||
"type": "table",
|
||||
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 4},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sort_desc(sum by(resource_namespace)(increase(kyverno_policy_results_total{rule_result=\"fail\"}[24h])))",
|
||||
"legendFormat": "{{resource_namespace}}",
|
||||
"instant": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 7,
|
||||
"title": "Admission Latency over time",
|
||||
"type": "timeseries",
|
||||
"gridPos": {"h": 8, "w": 24, "x": 0, "y": 12},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.50, sum(rate(kyverno_admission_review_duration_seconds_bucket[5m])) by (le)) * 1000",
|
||||
"legendFormat": "p50"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.95, sum(rate(kyverno_admission_review_duration_seconds_bucket[5m])) by (le)) * 1000",
|
||||
"legendFormat": "p95"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum(rate(kyverno_admission_review_duration_seconds_bucket[5m])) by (le)) * 1000",
|
||||
"legendFormat": "p99"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
Reference in New Issue
Block a user