From ad5b658467604b73c2a5630283244d89a7296798 Mon Sep 17 00:00:00 2001 From: n0trax Date: Tue, 5 Dec 2017 05:58:34 +0100 Subject: [PATCH] Add prometheus role (#9) --- roles/service-prometheus/LICENSE | 22 ++ roles/service-prometheus/README.md | 239 ++++++++++++++++++ roles/service-prometheus/defaults/main.yml | 32 +++ .../service-prometheus/files/alertmanager.yml | 13 + roles/service-prometheus/handlers/main.yml | 9 + .../service-prometheus/tasks/alertmanager.yml | 61 +++++ .../service-prometheus/tasks/install-gosu.yml | 19 ++ roles/service-prometheus/tasks/main.yml | 51 ++++ .../tasks/node-exporter.yml | 40 +++ roles/service-prometheus/tasks/prometheus.yml | 79 ++++++ .../templates/alertmanager.service.j2 | 18 ++ .../templates/node_exporter.service.j2 | 18 ++ .../templates/prometheus.service.j2 | 18 ++ .../templates/prometheus.yml.j2 | 36 +++ 14 files changed, 655 insertions(+) create mode 100644 roles/service-prometheus/LICENSE create mode 100644 roles/service-prometheus/README.md create mode 100644 roles/service-prometheus/defaults/main.yml create mode 100644 roles/service-prometheus/files/alertmanager.yml create mode 100644 roles/service-prometheus/handlers/main.yml create mode 100644 roles/service-prometheus/tasks/alertmanager.yml create mode 100644 roles/service-prometheus/tasks/install-gosu.yml create mode 100644 roles/service-prometheus/tasks/main.yml create mode 100644 roles/service-prometheus/tasks/node-exporter.yml create mode 100644 roles/service-prometheus/tasks/prometheus.yml create mode 100644 roles/service-prometheus/templates/alertmanager.service.j2 create mode 100644 roles/service-prometheus/templates/node_exporter.service.j2 create mode 100644 roles/service-prometheus/templates/prometheus.service.j2 create mode 100644 roles/service-prometheus/templates/prometheus.yml.j2 diff --git a/roles/service-prometheus/LICENSE b/roles/service-prometheus/LICENSE new file mode 100644 index 0000000..9aba21a --- /dev/null +++ b/roles/service-prometheus/LICENSE @@ -0,0 +1,22 @@ +The MIT License (MIT) + +Copyright (c) 2015 William Yeh + + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/roles/service-prometheus/README.md b/roles/service-prometheus/README.md new file mode 100644 index 0000000..2b7d0b2 --- /dev/null +++ b/roles/service-prometheus/README.md @@ -0,0 +1,239 @@ + +FFMWU prometheus +============ + + +## Summary + +Prometheus ansible role based on **[williamyeh.prometheus](https://galaxy.ansible.com/williamyeh/prometheus/)** + +This Ansible role has the following features for [Prometheus](http://prometheus.io/): + + - Install specific versions of [Prometheus server](https://github.com/prometheus/prometheus), [Node exporter](https://github.com/prometheus/node_exporter), [Alertmanager](https://github.com/prometheus/alertmanager). + - Handlers for restart/reload/stop events; + - Bare bone configuration (*real* configuration should be left to user's template files; see **Usage** section below). + +## Role Variables + + +### Mandatory variables + +The components to be installed: + +```yaml +# Supported components: +# +# [Server components] +# - "prometheus" +# - "alertmanager" +# +# [Exporter components] +# - "node_exporter" +# +prometheus_components +``` + + + +### Optional variables: general settings + + +User-configurable defaults: + +```yaml +# user and group +prometheus_user: prometheus +prometheus_group: prometheus + + +# directory for executable files +prometheus_install_path: /opt/prometheus + +# directory for configuration files +prometheus_config_path: /etc/prometheus + +# directory for PID files +prometheus_pid_path: /var/run/prometheus + +# directory for temporary files +prometheus_download_path: /tmp + +# version of helper utility "gosu" +gosu_version: "1.10" +``` + +### Optional variables: Prometheus server + +User-configurable defaults: + +```yaml +# which version? +prometheus_version: 2.0.0 + +# directory for rule files +prometheus_rule_path: {{ prometheus_config_path }}/rules + +# directory for file_sd files +prometheus_file_sd_config_path: {{ prometheus_config_path }}/tgroups + +# directory for runtime database +prometheus_db_path: /var/lib/prometheus +``` + + + + + + +User-installable configuration file (see [doc](http://prometheus.io/docs/operating/configuration/) for details): + + +```yaml +# main conf template relative to `playbook_dir`; +# to be installed to "{{ prometheus_config_path }}/prometheus.yml" +prometheus_conf_main +``` + + +User-installable rule files (see [doc](http://prometheus.io/docs/alerting/rules/) for details): + + +```yaml +# rule files to be installed to "{{ prometheus_rule_path }}" directory; +# dict fields: +# - key: memo for this rule +# - value: +# - src: file relative to `playbook_dir` +# - dest: target file relative to `{{ prometheus_rule_path }}` +prometheus_rule_files +``` + + +Additional command-line arguments, if any (use `prometheus --help` to see the full list of arguments): + +```yaml +prometheus_opts +``` + + +### Optional variables: Node exporter + + +User-configurable defaults: + +```yaml +# which version? +node_exporter_version: 0.15.1 +``` + +Additional command-line arguments, if any (use `node_exporter --help` to see the full list of arguments): + +```yaml +node_exporter_opts +``` + + +### Optional variables: Alertmanager + + +User-configurable defaults: + +```yaml +# which version? +alertmanager_version: 0.10.0 + +# directory for runtime database (currently for `silences.json`) +alertmanager_db_path: /var/lib/alertmanager +``` + +User-installable alertmanager conf file (see [doc](http://prometheus.io/docs/alerting/alertmanager/) for details): +See files directory alertmanager.yml + + +Additional command-line arguments, if any (use `alertmanager --help` to see the full list of arguments): + +```yaml +prometheus_alertmanager_opts +``` + + + + +## Handlers + +Prometheus server: + +- `reload prometheus` + +Alertmanager: + +- `reload alertmanager` + + +## Usage + + +### Step 1: add role + +Add role name `service-prometheus` to your playbook file. + + +### Step 2: add variables + +Set vars in your playbook file, if necessary. + +Simple example: + +```yaml +--- +# file: simple-playbook.yml + +- hosts: all + become: True + roles: + - service-prometheus + + vars: + prometheus_components: [ "prometheus", "alertmanager" ] +``` + + +### Step 3: copy user's config files, if necessary + + +More practical example: + +```yaml +--- +# file: complex-playbook.yml + +- hosts: all + become: True + roles: + - service-prometheus + + vars: + prometheus_components: + - prometheus + - node_exporter + - alertmanager + + prometheus_rule_files: + this_is_rule_1_InstanceDown: + src: some/path/basic.rules + dest: basic.rules +``` + + +### Step 4: browse the default Prometheus pages + +Open the page in your browser: + +- Prometheus - `http://HOST:9090` or `http://HOST:9090/consoles/node.html` + +- Alertmanager - `http://HOST:9093` + + +## License + +MIT License. See the [LICENSE file](LICENSE) for details. diff --git a/roles/service-prometheus/defaults/main.yml b/roles/service-prometheus/defaults/main.yml new file mode 100644 index 0000000..c639e3e --- /dev/null +++ b/roles/service-prometheus/defaults/main.yml @@ -0,0 +1,32 @@ +--- +# +# user-configurable defaults +# + +prometheus_components: + - "node_exporter" + +prometheus_user: prometheus +prometheus_group: prometheus + +prometheus_version: 2.0.0 +node_exporter_version: 0.15.1 +alertmanager_version: 0.10.0 + +gosu_version: "1.10" + +prometheus_install_path: /opt/prometheus +prometheus_config_path: /etc/prometheus +prometheus_rule_path: "{{ prometheus_config_path }}/rules" +prometheus_file_sd_config_path: "{{ prometheus_config_path }}/tgroups" +prometheus_db_path: /var/lib/prometheus +alertmanager_db_path: /var/lib/alertmanager +prometheus_pid_path: /var/run/prometheus + +prometheus_download_path: /tmp +prometheus_workdir: "{{ prometheus_download_path }}/prometheus_workdir" +prometheus_goroot: "{{ prometheus_workdir }}/go" +prometheus_gopath: "{{ prometheus_workdir }}/gopath" + +prometheus_default_opts: "--config.file={{ prometheus_config_path }}/prometheus.yml --storage.tsdb.path={{ prometheus_db_path }}" +alertmanager_default_opts: "-config.file={{ prometheus_config_path }}/alertmanager.yml -storage.path={{ alertmanager_db_path }}" diff --git a/roles/service-prometheus/files/alertmanager.yml b/roles/service-prometheus/files/alertmanager.yml new file mode 100644 index 0000000..abab519 --- /dev/null +++ b/roles/service-prometheus/files/alertmanager.yml @@ -0,0 +1,13 @@ +global: + +route: + group_by: ['alertname', 'cluster'] + group_wait: 30s + group_interval: 5m + repeat_interval: 3h + receiver: 'default-pager' + +receivers: + - name: 'default-pager' + pagerduty_configs: + - service_key: diff --git a/roles/service-prometheus/handlers/main.yml b/roles/service-prometheus/handlers/main.yml new file mode 100644 index 0000000..a293535 --- /dev/null +++ b/roles/service-prometheus/handlers/main.yml @@ -0,0 +1,9 @@ +- name: reload prometheus + service: + name: prometheus + state: reloaded + +- name: reload alertmanager + service: + name: alertmanager + state: reloaded diff --git a/roles/service-prometheus/tasks/alertmanager.yml b/roles/service-prometheus/tasks/alertmanager.yml new file mode 100644 index 0000000..3436a94 --- /dev/null +++ b/roles/service-prometheus/tasks/alertmanager.yml @@ -0,0 +1,61 @@ +--- +# Install Prometheus alertmanager. + +- name: set internal variables, part 1 + set_fact: + alertmanager_signature: "alertmanager-{{ alertmanager_version }}.linux-amd64" + +- name: set internal variables, part 2 + set_fact: + alertmanager_daemon_dir: "{{ prometheus_install_path }}/{{ alertmanager_signature }}" + +- name: set download url + set_fact: + alertmanager_tarball_url: "https://github.com/prometheus/alertmanager/releases/download/v{{ alertmanager_version }}/{{ alertmanager_signature }}.tar.gz" + +- name: download and uncompress alertmanager tarball + unarchive: + src: "{{ alertmanager_tarball_url }}" + dest: "{{ prometheus_install_path }}" + copy: no + owner: "{{ prometheus_user }}" + group: "{{ prometheus_group }}" + mode: "go-w" + creates: "{{ alertmanager_daemon_dir }}" + +- name: create alertmanager /usr/local/bin links + file: + src: "{{ alertmanager_daemon_dir }}/{{ item }}" + dest: "/usr/local/bin/{{ item }}" + state: link + with_items: + - "alertmanager" + - "amtool" + +- name: mkdir for alertmanager data (silences.json for now) + file: + path: "{{ item }}" + state: directory + owner: "{{ prometheus_user }}" + group: "{{ prometheus_group }}" + mode: "u=rwx,g=rx,o=" + with_items: + - "{{ alertmanager_db_path }}" + +- name: copy alertmanager systemd config + template: + src: "alertmanager.service.j2" + dest: "/lib/systemd/system/alertmanager.service" + +- name: install alertmanager config file + copy: + src: "alertmanager.yml" + dest: "{{ prometheus_config_path }}/alertmanager.yml" + notify: + - reload alertmanager + +- name: enable alertmanager service + service: + name: alertmanager + enabled: yes + state: started diff --git a/roles/service-prometheus/tasks/install-gosu.yml b/roles/service-prometheus/tasks/install-gosu.yml new file mode 100644 index 0000000..78c686a --- /dev/null +++ b/roles/service-prometheus/tasks/install-gosu.yml @@ -0,0 +1,19 @@ +# Install "gosu" utility. +# +# @see https://github.com/tianon/gosu +# + +- name: set internal variables for convenience + set_fact: + gosu_bin_url: "https://github.com/tianon/gosu/releases/download/{{ gosu_version }}/gosu-amd64" + +- name: download gosu executable + get_url: + url: "{{ gosu_bin_url }}" + dest: "/usr/local/bin/gosu" + +- name: add executable permission + file: + path: "/usr/local/bin/gosu" + state: file + mode: "a+x" diff --git a/roles/service-prometheus/tasks/main.yml b/roles/service-prometheus/tasks/main.yml new file mode 100644 index 0000000..4ed1092 --- /dev/null +++ b/roles/service-prometheus/tasks/main.yml @@ -0,0 +1,51 @@ +# Top-level installer for Prometheus. + +- name: create Prometheus group + group: + name: "{{ prometheus_group }}" + state: present + +- name: create Prometheus user + user: + name: "{{ prometheus_user }}" + group: "{{ prometheus_group }}" + createhome: no + shell: /sbin/nologin + comment: "Prometheus User" + state: present + +- name: create base directories + file: + path: "{{ item }}" + state: directory + owner: "{{ prometheus_user }}" + group: "{{ prometheus_group }}" + mode: "u=rwx,g=rx,o=" + with_items: + - "{{ prometheus_install_path }}" + - "{{ prometheus_config_path }}" + - "{{ prometheus_pid_path }}" + +- name: install helper utility "gosu" + include_tasks: install-gosu.yml + +- name: install and configure prometheus service + include_tasks: prometheus.yml + when: '"prometheus" in prometheus_components' + +- name: install and configure node-exporter service + include_tasks: node-exporter.yml + when: '"node_exporter" in prometheus_components' + +- name: install and configure alertmanager service + include_tasks: alertmanager.yml + when: '"alertmanager" in prometheus_components' + +- name: set {{ prometheus_install_path }} permissions, owner and group + file: + path: "{{ prometheus_install_path }}" + state: directory + owner: "{{ prometheus_user }}" + group: "{{ prometheus_group }}" + mode: "go-w" + recurse: yes diff --git a/roles/service-prometheus/tasks/node-exporter.yml b/roles/service-prometheus/tasks/node-exporter.yml new file mode 100644 index 0000000..cb55d53 --- /dev/null +++ b/roles/service-prometheus/tasks/node-exporter.yml @@ -0,0 +1,40 @@ +# Install Prometheus node-exporter. +# +# @see http://prometheus.io/docs/introduction/getting_started/ + +- name: set internal variables for convenience + set_fact: + node_exporter_daemon_dir: "{{ prometheus_install_path }}/node_exporter-{{ node_exporter_version }}.linux-amd64" + node_exporter_tarball_url: "https://github.com/prometheus/node_exporter/releases/download/v{{ node_exporter_version }}/node_exporter-{{ node_exporter_version }}.linux-amd64.tar.gz" + +- name: download and untar node_exporter tarball + unarchive: + src: "{{ node_exporter_tarball_url }}" + dest: "{{ prometheus_install_path }}" + copy: no + creates: "{{ node_exporter_daemon_dir }}" + +- name: create node_exporter /usr/local/bin link + file: + src: "{{ node_exporter_daemon_dir }}/node_exporter" + dest: "/usr/local/bin/node_exporter" + state: link + +- name: mkdir for data + file: + path: "{{ prometheus_db_path }}" + state: directory + owner: "{{ prometheus_user }}" + group: "{{ prometheus_group }}" + mode: "u=rwx,g=rx,o=" + +- name: copy systemd config to server + template: + src: "../templates/node_exporter.service.j2" + dest: "/lib/systemd/system/node_exporter.service" + +- name: enable node_exporter service + service: + name: node_exporter + enabled: yes + state: started diff --git a/roles/service-prometheus/tasks/prometheus.yml b/roles/service-prometheus/tasks/prometheus.yml new file mode 100644 index 0000000..53fae23 --- /dev/null +++ b/roles/service-prometheus/tasks/prometheus.yml @@ -0,0 +1,79 @@ +# Install Prometheus server. +# +# @see http://prometheus.io/docs/introduction/getting_started/ +# + +- name: set internal variables for convenience + set_fact: + prometheus_daemon_dir: "{{ prometheus_install_path }}/prometheus-{{ prometheus_version }}.linux-amd64" + prometheus_tarball_url: "https://github.com/prometheus/prometheus/releases/download/v{{ prometheus_version }}/prometheus-{{ prometheus_version }}.linux-amd64.tar.gz" + +- name: set prometheus default options + set_fact: + prometheus_default_opts: "{{ prometheus_default_opts }} --web.console.templates={{ prometheus_daemon_dir }}/consoles --web.console.libraries={{ prometheus_daemon_dir }}/console_libraries" + +- name: download and untar prometheus tarball + unarchive: + src: "{{ prometheus_tarball_url }}" + dest: "{{ prometheus_install_path }}" + copy: no + creates: "{{ prometheus_daemon_dir }}" + +- name: create prometheus /usr/local/bin links + file: + src: "{{ prometheus_daemon_dir }}/{{ item }}" + dest: "/usr/local/bin/{{ item }}" + state: link + with_items: + - "prometheus" + - "promtool" + +- name: mkdir for config and data + file: + path: "{{ item }}" + state: directory + owner: "{{ prometheus_user }}" + group: "{{ prometheus_group }}" + mode: "u=rwx,g=rx,o=" + with_items: + - "{{ prometheus_rule_path }}" + - "{{ prometheus_file_sd_config_path }}" + - "{{ prometheus_db_path }}" + +- name: copy prometheus systemd config + template: + src: "prometheus.service.j2" + dest: "/lib/systemd/system/prometheus.service" + +- name: copy rule files from playbook's, if any + copy: + src: "{{ playbook_dir }}/{{ item.value.src }}" + dest: "{{ prometheus_rule_path }}/{{ item.value.dest }}" + validate: "{{ prometheus_daemon_dir }}/promtool check rules %s" + with_dict: '{{ prometheus_rule_files | default({}) }}' + notify: + - reload prometheus + +- name: copy prometheus main config file from role's default, if necessary + template: + src: "prometheus.yml.j2" + dest: "{{ prometheus_config_path }}/prometheus.yml" + validate: "{{ prometheus_daemon_dir }}/promtool check config %s" + when: prometheus_conf_main is not defined + notify: + - reload prometheus + +- name: copy prometheus main config file from playbook's, if any + template: + src: "{{ playbook_dir }}/{{ prometheus_conf_main }}" + dest: "{{ prometheus_config_path }}/prometheus.yml" + validate: "{{ prometheus_daemon_dir }}/promtool check config %s" + when: prometheus_conf_main is defined + notify: + - reload prometheus + +- name: enable prometheus service + service: + name: prometheus + enabled: yes + state: started diff --git a/roles/service-prometheus/templates/alertmanager.service.j2 b/roles/service-prometheus/templates/alertmanager.service.j2 new file mode 100644 index 0000000..891e35f --- /dev/null +++ b/roles/service-prometheus/templates/alertmanager.service.j2 @@ -0,0 +1,18 @@ +[Unit] +Description=Prometheus alertmanager. +After=network.target + +[Service] +Type=simple + +PIDFile={{ prometheus_pid_path }}/alertmanager.pid + +User={{ prometheus_user }} +Group={{ prometheus_group }} + +{% if prometheus_opts is defined %} +ExecStart={{ alertmanager_daemon_dir }}/alertmanager {{ alertmanager_default_opts }} {{ alertmanager_opts }} +{% else %} +ExecStart={{ alertmanager_daemon_dir }}/alertmanager {{ alertmanager_default_opts }} +{% endif %} +ExecReload=/bin/kill -HUP $MAINPID diff --git a/roles/service-prometheus/templates/node_exporter.service.j2 b/roles/service-prometheus/templates/node_exporter.service.j2 new file mode 100644 index 0000000..abb2007 --- /dev/null +++ b/roles/service-prometheus/templates/node_exporter.service.j2 @@ -0,0 +1,18 @@ +[Unit] +Description=node_exporter - Prometheus exporter for machine metrics. +After=network.target + +[Service] +Type=simple + +PIDFile={{ prometheus_pid_path }}/node_exporter.pid + +User={{ prometheus_user }} +Group={{ prometheus_group }} + +{% if node_exporter_opts is defined %} +ExecStart={{ node_exporter_daemon_dir }}/node_exporter {{ node_exporter_opts }} +{% else %} +ExecStart={{ node_exporter_daemon_dir }}/node_exporter +{% endif %} +ExecReload=/bin/kill -HUP $MAINPID diff --git a/roles/service-prometheus/templates/prometheus.service.j2 b/roles/service-prometheus/templates/prometheus.service.j2 new file mode 100644 index 0000000..f6773cb --- /dev/null +++ b/roles/service-prometheus/templates/prometheus.service.j2 @@ -0,0 +1,18 @@ +[Unit] +Description=Prometheus server daemon. +After=network.target + +[Service] +Type=simple + +PIDFile={{ prometheus_pid_path }}/prometheus.pid + +User={{ prometheus_user }} +Group={{ prometheus_group }} + +{% if prometheus_opts is defined %} +ExecStart={{ prometheus_daemon_dir }}/prometheus {{ prometheus_default_opts }} {{ prometheus_node_exporter_opts }} +{% else %} +ExecStart={{ prometheus_daemon_dir }}/prometheus {{ prometheus_default_opts }} +{% endif %} +ExecReload=/bin/kill -HUP $MAINPID diff --git a/roles/service-prometheus/templates/prometheus.yml.j2 b/roles/service-prometheus/templates/prometheus.yml.j2 new file mode 100644 index 0000000..93a3eb0 --- /dev/null +++ b/roles/service-prometheus/templates/prometheus.yml.j2 @@ -0,0 +1,36 @@ +global: + scrape_interval: 15s # By default, scrape targets every 15 seconds. + evaluation_interval: 15s # By default, scrape targets every 15 seconds. + # scrape_timeout is set to the global default (10s). + + # The labels to add to any time series or alerts when communicating with + # external systems (federation, remote storage, Alertmanager). + external_labels: + monitor: 'master' + +{% if prometheus_rule_files is defined %} +# Rule files specifies a list of files from which rules are read. +rule_files: + {% for (key, value) in prometheus_rule_files.iteritems() %} + - {{ prometheus_rule_path }}/{{ value.dest }} + {% endfor %} +{% endif %} + +# A list of scrape configurations. +scrape_configs: + + - job_name: 'prometheus' + scrape_interval: 10s + scrape_timeout: 10s + static_configs: + - targets: ['localhost:9090'] + + - job_name: "node" + file_sd_configs: + - files: + - '{{ prometheus_file_sd_config_path }}/*.json' + - '{{ prometheus_file_sd_config_path }}/*.yml' + - '{{ prometheus_file_sd_config_path }}/*.yaml' + #static_configs: + #- targets: + # - "localhost:9100"