From ab252b377667d83d9fa208e10f9fc5531308b38e Mon Sep 17 00:00:00 2001 From: Julian Labus Date: Mon, 10 Dec 2018 12:47:27 +0100 Subject: [PATCH] Role service-prometheus: make alert rules less verbose --- playbooks/prometheus/alert.rules | 8 ++++---- playbooks/prometheus/prometheus.yml.j2 | 14 ++++++++++++++ 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/playbooks/prometheus/alert.rules b/playbooks/prometheus/alert.rules index a329be4..e337b53 100644 --- a/playbooks/prometheus/alert.rules +++ b/playbooks/prometheus/alert.rules @@ -2,14 +2,14 @@ groups: - name: blackbox rules: - alert: Icmp4Timeout - expr: probe_success{job="icmp4"} == 0 + expr: (probe_success{job="icmp4"} == 0 and on(hostname) ALERTS{alertname="InstanceDown"} == 0) or (probe_success{job="icmp4",hostname=~"zuckerwatte|aubergine|glueckskeks"} == 0) for: 5m annotations: description: 'ICMP requests to the primary IPv4 address timed out' summary: 'Instance {{ $labels.instance }} does not respond to ICMPv4 echo requests' - alert: Icmp6Timeout - expr: probe_success{job="icmp6"} == 0 + expr: (probe_success{job="icmp6"} == 0 and on(hostname) ALERTS{alertname="InstanceDown"} == 0) or (probe_success{job="icmp6",hostname=~"zuckerwatte|aubergine|glueckskeks"} == 0) for: 5m annotations: description: 'ICMP requests to the primary IPv6 address timed out' @@ -25,7 +25,7 @@ groups: summary: 'Instance {{ $labels.instance }} is down' - alert: ExporterDown - expr: up{job!="node"} == 0 + expr: up{job!="node"} == 0 and ON(hostname) ALERTS{alertname="InstanceDown"} == 0 for: 5m annotations: description: 'An exporter is down for more than 5 minutes' @@ -104,7 +104,7 @@ groups: - name: fastd rules: - alert: FastdNoTraffic - expr: irate(fastd_tx_bytes{interface!~".*-1312"}[5m]) == 0 + expr: irate(fastd_tx_bytes{interface!~".*-1312"}[5m]) == 0 and ON(hostname) (time() - node_boot_time_seconds{job="node"}) / 60 > 30 for: 5m annotations: description: 'No TX data was seen on a fastd interface for more than 5 minutes' diff --git a/playbooks/prometheus/prometheus.yml.j2 b/playbooks/prometheus/prometheus.yml.j2 index a0345e3..a3b1950 100644 --- a/playbooks/prometheus/prometheus.yml.j2 +++ b/playbooks/prometheus/prometheus.yml.j2 @@ -37,6 +37,11 @@ scrape_configs: labels: group: '{{ group }}' {% endfor %} + relabel_configs: + - source_labels: [__address__] + regex: '([a-z]+)\..*' + replacement: '$1' + target_label: hostname - job_name: "fastd" scheme: "https" @@ -45,6 +50,11 @@ scrape_configs: {% for host in groups['ffmwu-gateways'] %} - '{{ host }}:9281' {% endfor %} + relabel_configs: + - source_labels: [__address__] + regex: '([a-z]+)\..*' + replacement: '$1' + target_label: hostname {% for job in ['icmp4','icmp6'] %} - job_name: "{{ job }}" @@ -74,5 +84,9 @@ scrape_configs: target_label: instance - target_label: __address__ replacement: 127.0.0.1:9115 + - source_labels: [__param_target] + regex: '([a-z]+)\..*' + replacement: '$1' + target_label: hostname {% endfor %}