Role service-prometheus: make alert rules less verbose
This commit is contained in:
parent
7b9d3352e8
commit
ab252b3776
2 changed files with 18 additions and 4 deletions
|
@ -2,14 +2,14 @@ groups:
|
|||
- name: blackbox
|
||||
rules:
|
||||
- alert: Icmp4Timeout
|
||||
expr: probe_success{job="icmp4"} == 0
|
||||
expr: (probe_success{job="icmp4"} == 0 and on(hostname) ALERTS{alertname="InstanceDown"} == 0) or (probe_success{job="icmp4",hostname=~"zuckerwatte|aubergine|glueckskeks"} == 0)
|
||||
for: 5m
|
||||
annotations:
|
||||
description: 'ICMP requests to the primary IPv4 address timed out'
|
||||
summary: 'Instance {{ $labels.instance }} does not respond to ICMPv4 echo requests'
|
||||
|
||||
- alert: Icmp6Timeout
|
||||
expr: probe_success{job="icmp6"} == 0
|
||||
expr: (probe_success{job="icmp6"} == 0 and on(hostname) ALERTS{alertname="InstanceDown"} == 0) or (probe_success{job="icmp6",hostname=~"zuckerwatte|aubergine|glueckskeks"} == 0)
|
||||
for: 5m
|
||||
annotations:
|
||||
description: 'ICMP requests to the primary IPv6 address timed out'
|
||||
|
@ -25,7 +25,7 @@ groups:
|
|||
summary: 'Instance {{ $labels.instance }} is down'
|
||||
|
||||
- alert: ExporterDown
|
||||
expr: up{job!="node"} == 0
|
||||
expr: up{job!="node"} == 0 and ON(hostname) ALERTS{alertname="InstanceDown"} == 0
|
||||
for: 5m
|
||||
annotations:
|
||||
description: 'An exporter is down for more than 5 minutes'
|
||||
|
@ -104,7 +104,7 @@ groups:
|
|||
- name: fastd
|
||||
rules:
|
||||
- alert: FastdNoTraffic
|
||||
expr: irate(fastd_tx_bytes{interface!~".*-1312"}[5m]) == 0
|
||||
expr: irate(fastd_tx_bytes{interface!~".*-1312"}[5m]) == 0 and ON(hostname) (time() - node_boot_time_seconds{job="node"}) / 60 > 30
|
||||
for: 5m
|
||||
annotations:
|
||||
description: 'No TX data was seen on a fastd interface for more than 5 minutes'
|
||||
|
|
|
@ -37,6 +37,11 @@ scrape_configs:
|
|||
labels:
|
||||
group: '{{ group }}'
|
||||
{% endfor %}
|
||||
relabel_configs:
|
||||
- source_labels: [__address__]
|
||||
regex: '([a-z]+)\..*'
|
||||
replacement: '$1'
|
||||
target_label: hostname
|
||||
|
||||
- job_name: "fastd"
|
||||
scheme: "https"
|
||||
|
@ -45,6 +50,11 @@ scrape_configs:
|
|||
{% for host in groups['ffmwu-gateways'] %}
|
||||
- '{{ host }}:9281'
|
||||
{% endfor %}
|
||||
relabel_configs:
|
||||
- source_labels: [__address__]
|
||||
regex: '([a-z]+)\..*'
|
||||
replacement: '$1'
|
||||
target_label: hostname
|
||||
|
||||
{% for job in ['icmp4','icmp6'] %}
|
||||
- job_name: "{{ job }}"
|
||||
|
@ -74,5 +84,9 @@ scrape_configs:
|
|||
target_label: instance
|
||||
- target_label: __address__
|
||||
replacement: 127.0.0.1:9115
|
||||
- source_labels: [__param_target]
|
||||
regex: '([a-z]+)\..*'
|
||||
replacement: '$1'
|
||||
target_label: hostname
|
||||
|
||||
{% endfor %}
|
||||
|
|
Loading…
Reference in a new issue