75 lines
3 KiB
YAML
75 lines
3 KiB
YAML
|
groups:
|
||
|
- name: blackbox
|
||
|
rules:
|
||
|
- alert: BlackboxProbeFailed
|
||
|
expr: probe_success == 0
|
||
|
for: 0m
|
||
|
labels:
|
||
|
severity: critical
|
||
|
annotations:
|
||
|
summary: Blackbox probe failed (instance {{ $labels.instance }})
|
||
|
description: "Probe failed\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||
|
|
||
|
- alert: BlackboxSlowProbe
|
||
|
expr: avg_over_time(probe_duration_seconds[1m]) > 5
|
||
|
for: 1m
|
||
|
labels:
|
||
|
severity: warning
|
||
|
annotations:
|
||
|
summary: Blackbox slow probe (instance {{ $labels.instance }})
|
||
|
description: "Blackbox probe took more than 5s to complete\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||
|
|
||
|
- alert: BlackboxProbeHttpFailure
|
||
|
expr: probe_http_status_code <= 199 OR probe_http_status_code >= 400
|
||
|
for: 0m
|
||
|
labels:
|
||
|
severity: critical
|
||
|
annotations:
|
||
|
summary: Blackbox probe HTTP failure (instance {{ $labels.instance }})
|
||
|
description: "HTTP status code is not 200-399\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||
|
|
||
|
- alert: BlackboxSslCertificateWillExpireSoon
|
||
|
expr: probe_ssl_earliest_cert_expiry - time() < 86400 * 30
|
||
|
for: 0m
|
||
|
labels:
|
||
|
severity: warning
|
||
|
annotations:
|
||
|
summary: Blackbox SSL certificate will expire soon (instance {{ $labels.instance }})
|
||
|
description: "SSL certificate expires in 30 days\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||
|
|
||
|
- alert: BlackboxSslCertificateWillExpireSoon
|
||
|
expr: probe_ssl_earliest_cert_expiry - time() < 86400 * 3
|
||
|
for: 0m
|
||
|
labels:
|
||
|
severity: critical
|
||
|
annotations:
|
||
|
summary: Blackbox SSL certificate will expire soon (instance {{ $labels.instance }})
|
||
|
description: "SSL certificate expires in 3 days\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||
|
|
||
|
- alert: BlackboxSslCertificateExpired
|
||
|
expr: probe_ssl_earliest_cert_expiry - time() <= 0
|
||
|
for: 0m
|
||
|
labels:
|
||
|
severity: critical
|
||
|
annotations:
|
||
|
summary: Blackbox SSL certificate expired (instance {{ $labels.instance }})
|
||
|
description: "SSL certificate has expired already\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||
|
|
||
|
- alert: BlackboxProbeSlowHttp
|
||
|
expr: avg_over_time(probe_http_duration_seconds[1m]) > 1
|
||
|
for: 1m
|
||
|
labels:
|
||
|
severity: warning
|
||
|
annotations:
|
||
|
summary: Blackbox probe slow HTTP (instance {{ $labels.instance }})
|
||
|
description: "HTTP request took more than 1s\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||
|
|
||
|
- alert: BlackboxProbeSlowPing
|
||
|
expr: avg_over_time(probe_icmp_duration_seconds[1m]) > 1
|
||
|
for: 1m
|
||
|
labels:
|
||
|
severity: warning
|
||
|
annotations:
|
||
|
summary: Blackbox probe slow ping (instance {{ $labels.instance }})
|
||
|
description: "Blackbox ping took more than 1s\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|