fix(prometheus): tune alert probe duration
This commit is contained in:
parent
aa7085082a
commit
40d9875388
@ -11,13 +11,13 @@ groups:
|
|||||||
description: "Probe failed\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
description: "Probe failed\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||||||
|
|
||||||
- alert: BlackboxSlowProbe
|
- alert: BlackboxSlowProbe
|
||||||
expr: avg_over_time(probe_duration_seconds[1m]) > 5
|
expr: avg_over_time(probe_duration_seconds[1m]) > 2
|
||||||
for: 1m
|
for: 1m
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
annotations:
|
annotations:
|
||||||
summary: Blackbox slow probe (instance {{ $labels.instance }})
|
summary: Blackbox slow probe (instance {{ $labels.instance }})
|
||||||
description: "Blackbox probe took more than 5s to complete\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
description: "Blackbox probe took more than 2s to complete\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||||||
|
|
||||||
- alert: BlackboxProbeHttpFailure
|
- alert: BlackboxProbeHttpFailure
|
||||||
expr: probe_http_status_code <= 199 OR probe_http_status_code >= 400
|
expr: probe_http_status_code <= 199 OR probe_http_status_code >= 400
|
||||||
@ -56,19 +56,19 @@ groups:
|
|||||||
description: "SSL certificate has expired already\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
description: "SSL certificate has expired already\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||||||
|
|
||||||
- alert: BlackboxProbeSlowHttp
|
- alert: BlackboxProbeSlowHttp
|
||||||
expr: avg_over_time(probe_http_duration_seconds[1m]) > 1
|
expr: avg_over_time(probe_http_duration_seconds[1m]) > 2
|
||||||
for: 1m
|
for: 1m
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
annotations:
|
annotations:
|
||||||
summary: Blackbox probe slow HTTP (instance {{ $labels.instance }})
|
summary: Blackbox probe slow HTTP (instance {{ $labels.instance }})
|
||||||
description: "HTTP request took more than 1s\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
description: "HTTP request took more than 2s\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||||||
|
|
||||||
- alert: BlackboxProbeSlowPing
|
- alert: BlackboxProbeSlowPing
|
||||||
expr: avg_over_time(probe_icmp_duration_seconds[1m]) > 1
|
expr: avg_over_time(probe_icmp_duration_seconds[1m]) > 2
|
||||||
for: 1m
|
for: 1m
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
annotations:
|
annotations:
|
||||||
summary: Blackbox probe slow ping (instance {{ $labels.instance }})
|
summary: Blackbox probe slow ping (instance {{ $labels.instance }})
|
||||||
description: "Blackbox ping took more than 1s\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
description: "Blackbox ping took more than 2s\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||||||
|
Loading…
Reference in New Issue
Block a user