Role service-prometheus: make alert rules less verbose

This commit is contained in:
Julian Labus 2018-12-10 12:47:27 +01:00
parent 7b9d3352e8
commit ab252b3776
No known key found for this signature in database
GPG key ID: 8AF209F2C6B3572A
2 changed files with 18 additions and 4 deletions

View file

@ -2,14 +2,14 @@ groups:
- name: blackbox
rules:
- alert: Icmp4Timeout
expr: probe_success{job="icmp4"} == 0
expr: (probe_success{job="icmp4"} == 0 and on(hostname) ALERTS{alertname="InstanceDown"} == 0) or (probe_success{job="icmp4",hostname=~"zuckerwatte|aubergine|glueckskeks"} == 0)
for: 5m
annotations:
description: 'ICMP requests to the primary IPv4 address timed out'
summary: 'Instance {{ $labels.instance }} does not respond to ICMPv4 echo requests'
- alert: Icmp6Timeout
expr: probe_success{job="icmp6"} == 0
expr: (probe_success{job="icmp6"} == 0 and on(hostname) ALERTS{alertname="InstanceDown"} == 0) or (probe_success{job="icmp6",hostname=~"zuckerwatte|aubergine|glueckskeks"} == 0)
for: 5m
annotations:
description: 'ICMP requests to the primary IPv6 address timed out'
@ -25,7 +25,7 @@ groups:
summary: 'Instance {{ $labels.instance }} is down'
- alert: ExporterDown
expr: up{job!="node"} == 0
expr: up{job!="node"} == 0 and ON(hostname) ALERTS{alertname="InstanceDown"} == 0
for: 5m
annotations:
description: 'An exporter is down for more than 5 minutes'
@ -104,7 +104,7 @@ groups:
- name: fastd
rules:
- alert: FastdNoTraffic
expr: irate(fastd_tx_bytes{interface!~".*-1312"}[5m]) == 0
expr: irate(fastd_tx_bytes{interface!~".*-1312"}[5m]) == 0 and ON(hostname) (time() - node_boot_time_seconds{job="node"}) / 60 > 30
for: 5m
annotations:
description: 'No TX data was seen on a fastd interface for more than 5 minutes'

View file

@ -37,6 +37,11 @@ scrape_configs:
labels:
group: '{{ group }}'
{% endfor %}
relabel_configs:
- source_labels: [__address__]
regex: '([a-z]+)\..*'
replacement: '$1'
target_label: hostname
- job_name: "fastd"
scheme: "https"
@ -45,6 +50,11 @@ scrape_configs:
{% for host in groups['ffmwu-gateways'] %}
- '{{ host }}:9281'
{% endfor %}
relabel_configs:
- source_labels: [__address__]
regex: '([a-z]+)\..*'
replacement: '$1'
target_label: hostname
{% for job in ['icmp4','icmp6'] %}
- job_name: "{{ job }}"
@ -74,5 +84,9 @@ scrape_configs:
target_label: instance
- target_label: __address__
replacement: 127.0.0.1:9115
- source_labels: [__param_target]
regex: '([a-z]+)\..*'
replacement: '$1'
target_label: hostname
{% endfor %}