diff --git a/inventory/ffmwu-monitoring b/inventory/ffmwu-monitoring new file mode 100644 index 0000000..fda501a --- /dev/null +++ b/inventory/ffmwu-monitoring @@ -0,0 +1 @@ +[ffmwu-monitoring] diff --git a/inventory/group_vars/all b/inventory/group_vars/all index a89ccef..4cbe3a7 100644 --- a/inventory/group_vars/all +++ b/inventory/group_vars/all @@ -12,6 +12,12 @@ bgp_ipv4_transfer_net: 10.37.0.0/18 bgp_ipv6_transfer_net: fd37:b4dc:4b1e::/64 bgp_groups: - ffmwu-gateways + - ffmwu-monitoring + +prometheus_groups: + - ffmwu-gateways + +prometheus_conf_main: prometheus/prometheus.yml.j2 http_domain_internal: ffmwu.org http_domain_external: freifunk-mwu.de diff --git a/inventory/group_vars/ffmwu-monitoring b/inventory/group_vars/ffmwu-monitoring new file mode 100644 index 0000000..54106b6 --- /dev/null +++ b/inventory/group_vars/ffmwu-monitoring @@ -0,0 +1,13 @@ +--- +routing_tables: + mwu: 41 + +common_repos: + backend-scripts: + repo_url: https://github.com/freifunk-mwu/backend-scripts.git + version: ansible + +prometheus_components: + - prometheus + - alertmanager + - node_exporter diff --git a/playbooks/monitoring.yml b/playbooks/monitoring.yml new file mode 100755 index 0000000..958b45c --- /dev/null +++ b/playbooks/monitoring.yml @@ -0,0 +1,26 @@ +#!/usr/bin/ansible-playbook + +- hosts: ffmwu-monitoring + remote_user: admin + roles: + - prerequisites + - server-apt-repos + - server-basic + - users + - system-sysctl + - git-repos + - service-haveged + - service-ntpd + - kmod-batman + - network-routetables + - network-batman + - network-meshbridge + - network-fastd + - network-routing + - service-nginx + - service-fastd + - service-fastd-backbone + - service-bird + - service-respondd + - service-nullmailer + - service-prometheus diff --git a/playbooks/prometheus/prometheus.yml.j2 b/playbooks/prometheus/prometheus.yml.j2 new file mode 100644 index 0000000..fcb8992 --- /dev/null +++ b/playbooks/prometheus/prometheus.yml.j2 @@ -0,0 +1,42 @@ +global: + scrape_interval: 15s # By default, scrape targets every 15 seconds. + evaluation_interval: 15s # By default, scrape targets every 15 seconds. + # scrape_timeout is set to the global default (10s). + + # The labels to add to any time series or alerts when communicating with + # external systems (federation, remote storage, Alertmanager). + external_labels: + monitor: 'master' + +{% if prometheus_rule_files is defined %} +# Rule files specifies a list of files from which rules are read. +rule_files: + {% for (key, value) in prometheus_rule_files.items() %} + - {{ prometheus_rule_path }}/{{ value.dest }} + {% endfor %} +{% endif %} + +# A list of scrape configurations. +scrape_configs: + + - job_name: 'prometheus' + scrape_interval: 10s + scrape_timeout: 10s + static_configs: + - targets: ['localhost:9090', 'localhost:9100'] + + - job_name: "node" + file_sd_configs: + - files: + - '{{ prometheus_file_sd_config_path }}/*.json' + - '{{ prometheus_file_sd_config_path }}/*.yml' + - '{{ prometheus_file_sd_config_path }}/*.yaml' + static_configs: +{%for group in prometheus_groups %} + - targets: +{% for host in groups[group] %} + - '{{ host }}:9100' +{% endfor %} + labels: + group: '{{ group }}' +{% endfor %}