Role service-prometheus: make alert rules less verbose
This commit is contained in:
parent
7b9d3352e8
commit
ab252b3776
2 changed files with 18 additions and 4 deletions
|
@ -2,14 +2,14 @@ groups:
|
||||||
- name: blackbox
|
- name: blackbox
|
||||||
rules:
|
rules:
|
||||||
- alert: Icmp4Timeout
|
- alert: Icmp4Timeout
|
||||||
expr: probe_success{job="icmp4"} == 0
|
expr: (probe_success{job="icmp4"} == 0 and on(hostname) ALERTS{alertname="InstanceDown"} == 0) or (probe_success{job="icmp4",hostname=~"zuckerwatte|aubergine|glueckskeks"} == 0)
|
||||||
for: 5m
|
for: 5m
|
||||||
annotations:
|
annotations:
|
||||||
description: 'ICMP requests to the primary IPv4 address timed out'
|
description: 'ICMP requests to the primary IPv4 address timed out'
|
||||||
summary: 'Instance {{ $labels.instance }} does not respond to ICMPv4 echo requests'
|
summary: 'Instance {{ $labels.instance }} does not respond to ICMPv4 echo requests'
|
||||||
|
|
||||||
- alert: Icmp6Timeout
|
- alert: Icmp6Timeout
|
||||||
expr: probe_success{job="icmp6"} == 0
|
expr: (probe_success{job="icmp6"} == 0 and on(hostname) ALERTS{alertname="InstanceDown"} == 0) or (probe_success{job="icmp6",hostname=~"zuckerwatte|aubergine|glueckskeks"} == 0)
|
||||||
for: 5m
|
for: 5m
|
||||||
annotations:
|
annotations:
|
||||||
description: 'ICMP requests to the primary IPv6 address timed out'
|
description: 'ICMP requests to the primary IPv6 address timed out'
|
||||||
|
@ -25,7 +25,7 @@ groups:
|
||||||
summary: 'Instance {{ $labels.instance }} is down'
|
summary: 'Instance {{ $labels.instance }} is down'
|
||||||
|
|
||||||
- alert: ExporterDown
|
- alert: ExporterDown
|
||||||
expr: up{job!="node"} == 0
|
expr: up{job!="node"} == 0 and ON(hostname) ALERTS{alertname="InstanceDown"} == 0
|
||||||
for: 5m
|
for: 5m
|
||||||
annotations:
|
annotations:
|
||||||
description: 'An exporter is down for more than 5 minutes'
|
description: 'An exporter is down for more than 5 minutes'
|
||||||
|
@ -104,7 +104,7 @@ groups:
|
||||||
- name: fastd
|
- name: fastd
|
||||||
rules:
|
rules:
|
||||||
- alert: FastdNoTraffic
|
- alert: FastdNoTraffic
|
||||||
expr: irate(fastd_tx_bytes{interface!~".*-1312"}[5m]) == 0
|
expr: irate(fastd_tx_bytes{interface!~".*-1312"}[5m]) == 0 and ON(hostname) (time() - node_boot_time_seconds{job="node"}) / 60 > 30
|
||||||
for: 5m
|
for: 5m
|
||||||
annotations:
|
annotations:
|
||||||
description: 'No TX data was seen on a fastd interface for more than 5 minutes'
|
description: 'No TX data was seen on a fastd interface for more than 5 minutes'
|
||||||
|
|
|
@ -37,6 +37,11 @@ scrape_configs:
|
||||||
labels:
|
labels:
|
||||||
group: '{{ group }}'
|
group: '{{ group }}'
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
|
relabel_configs:
|
||||||
|
- source_labels: [__address__]
|
||||||
|
regex: '([a-z]+)\..*'
|
||||||
|
replacement: '$1'
|
||||||
|
target_label: hostname
|
||||||
|
|
||||||
- job_name: "fastd"
|
- job_name: "fastd"
|
||||||
scheme: "https"
|
scheme: "https"
|
||||||
|
@ -45,6 +50,11 @@ scrape_configs:
|
||||||
{% for host in groups['ffmwu-gateways'] %}
|
{% for host in groups['ffmwu-gateways'] %}
|
||||||
- '{{ host }}:9281'
|
- '{{ host }}:9281'
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
|
relabel_configs:
|
||||||
|
- source_labels: [__address__]
|
||||||
|
regex: '([a-z]+)\..*'
|
||||||
|
replacement: '$1'
|
||||||
|
target_label: hostname
|
||||||
|
|
||||||
{% for job in ['icmp4','icmp6'] %}
|
{% for job in ['icmp4','icmp6'] %}
|
||||||
- job_name: "{{ job }}"
|
- job_name: "{{ job }}"
|
||||||
|
@ -74,5 +84,9 @@ scrape_configs:
|
||||||
target_label: instance
|
target_label: instance
|
||||||
- target_label: __address__
|
- target_label: __address__
|
||||||
replacement: 127.0.0.1:9115
|
replacement: 127.0.0.1:9115
|
||||||
|
- source_labels: [__param_target]
|
||||||
|
regex: '([a-z]+)\..*'
|
||||||
|
replacement: '$1'
|
||||||
|
target_label: hostname
|
||||||
|
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
|
|
Loading…
Reference in a new issue