From 40d9875388ffd373f2630dff87e1d897c5ee665b Mon Sep 17 00:00:00 2001 From: nyyu Date: Sun, 24 Apr 2022 10:32:45 +0200 Subject: [PATCH] fix(prometheus): tune alert probe duration --- conf/prometheus/alerts/blackbox.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/conf/prometheus/alerts/blackbox.yml b/conf/prometheus/alerts/blackbox.yml index 28aa8ea..1206d85 100644 --- a/conf/prometheus/alerts/blackbox.yml +++ b/conf/prometheus/alerts/blackbox.yml @@ -11,13 +11,13 @@ groups: description: "Probe failed\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: BlackboxSlowProbe - expr: avg_over_time(probe_duration_seconds[1m]) > 5 + expr: avg_over_time(probe_duration_seconds[1m]) > 2 for: 1m labels: severity: warning annotations: summary: Blackbox slow probe (instance {{ $labels.instance }}) - description: "Blackbox probe took more than 5s to complete\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + description: "Blackbox probe took more than 2s to complete\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: BlackboxProbeHttpFailure expr: probe_http_status_code <= 199 OR probe_http_status_code >= 400 @@ -56,19 +56,19 @@ groups: description: "SSL certificate has expired already\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: BlackboxProbeSlowHttp - expr: avg_over_time(probe_http_duration_seconds[1m]) > 1 + expr: avg_over_time(probe_http_duration_seconds[1m]) > 2 for: 1m labels: severity: warning annotations: summary: Blackbox probe slow HTTP (instance {{ $labels.instance }}) - description: "HTTP request took more than 1s\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + description: "HTTP request took more than 2s\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: BlackboxProbeSlowPing - expr: avg_over_time(probe_icmp_duration_seconds[1m]) > 1 + expr: avg_over_time(probe_icmp_duration_seconds[1m]) > 2 for: 1m labels: severity: warning annotations: summary: Blackbox probe slow ping (instance {{ $labels.instance }}) - description: "Blackbox ping took more than 1s\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + description: "Blackbox ping took more than 2s\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"