Add prometheus role (#9)

This commit is contained in:
n0trax 2017-12-05 05:58:34 +01:00 committed by kokel
parent 0f9cee0e7d
commit ad5b658467
14 changed files with 655 additions and 0 deletions

View file

@ -0,0 +1,22 @@
The MIT License (MIT)
Copyright (c) 2015 William Yeh <william.pjyeh@gmail.com>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View file

@ -0,0 +1,239 @@
FFMWU prometheus
============
## Summary
Prometheus ansible role based on **[williamyeh.prometheus](https://galaxy.ansible.com/williamyeh/prometheus/)**
This Ansible role has the following features for [Prometheus](http://prometheus.io/):
- Install specific versions of [Prometheus server](https://github.com/prometheus/prometheus), [Node exporter](https://github.com/prometheus/node_exporter), [Alertmanager](https://github.com/prometheus/alertmanager).
- Handlers for restart/reload/stop events;
- Bare bone configuration (*real* configuration should be left to user's template files; see **Usage** section below).
## Role Variables
### Mandatory variables
The components to be installed:
```yaml
# Supported components:
#
# [Server components]
# - "prometheus"
# - "alertmanager"
#
# [Exporter components]
# - "node_exporter"
#
prometheus_components
```
### Optional variables: general settings
User-configurable defaults:
```yaml
# user and group
prometheus_user: prometheus
prometheus_group: prometheus
# directory for executable files
prometheus_install_path: /opt/prometheus
# directory for configuration files
prometheus_config_path: /etc/prometheus
# directory for PID files
prometheus_pid_path: /var/run/prometheus
# directory for temporary files
prometheus_download_path: /tmp
# version of helper utility "gosu"
gosu_version: "1.10"
```
### Optional variables: Prometheus server
User-configurable defaults:
```yaml
# which version?
prometheus_version: 2.0.0
# directory for rule files
prometheus_rule_path: {{ prometheus_config_path }}/rules
# directory for file_sd files
prometheus_file_sd_config_path: {{ prometheus_config_path }}/tgroups
# directory for runtime database
prometheus_db_path: /var/lib/prometheus
```
User-installable configuration file (see [doc](http://prometheus.io/docs/operating/configuration/) for details):
```yaml
# main conf template relative to `playbook_dir`;
# to be installed to "{{ prometheus_config_path }}/prometheus.yml"
prometheus_conf_main
```
User-installable rule files (see [doc](http://prometheus.io/docs/alerting/rules/) for details):
```yaml
# rule files to be installed to "{{ prometheus_rule_path }}" directory;
# dict fields:
# - key: memo for this rule
# - value:
# - src: file relative to `playbook_dir`
# - dest: target file relative to `{{ prometheus_rule_path }}`
prometheus_rule_files
```
Additional command-line arguments, if any (use `prometheus --help` to see the full list of arguments):
```yaml
prometheus_opts
```
### Optional variables: Node exporter
User-configurable defaults:
```yaml
# which version?
node_exporter_version: 0.15.1
```
Additional command-line arguments, if any (use `node_exporter --help` to see the full list of arguments):
```yaml
node_exporter_opts
```
### Optional variables: Alertmanager
User-configurable defaults:
```yaml
# which version?
alertmanager_version: 0.10.0
# directory for runtime database (currently for `silences.json`)
alertmanager_db_path: /var/lib/alertmanager
```
User-installable alertmanager conf file (see [doc](http://prometheus.io/docs/alerting/alertmanager/) for details):
See files directory alertmanager.yml
Additional command-line arguments, if any (use `alertmanager --help` to see the full list of arguments):
```yaml
prometheus_alertmanager_opts
```
## Handlers
Prometheus server:
- `reload prometheus`
Alertmanager:
- `reload alertmanager`
## Usage
### Step 1: add role
Add role name `service-prometheus` to your playbook file.
### Step 2: add variables
Set vars in your playbook file, if necessary.
Simple example:
```yaml
---
# file: simple-playbook.yml
- hosts: all
become: True
roles:
- service-prometheus
vars:
prometheus_components: [ "prometheus", "alertmanager" ]
```
### Step 3: copy user's config files, if necessary
More practical example:
```yaml
---
# file: complex-playbook.yml
- hosts: all
become: True
roles:
- service-prometheus
vars:
prometheus_components:
- prometheus
- node_exporter
- alertmanager
prometheus_rule_files:
this_is_rule_1_InstanceDown:
src: some/path/basic.rules
dest: basic.rules
```
### Step 4: browse the default Prometheus pages
Open the page in your browser:
- Prometheus - `http://HOST:9090` or `http://HOST:9090/consoles/node.html`
- Alertmanager - `http://HOST:9093`
## License
MIT License. See the [LICENSE file](LICENSE) for details.

View file

@ -0,0 +1,32 @@
---
#
# user-configurable defaults
#
prometheus_components:
- "node_exporter"
prometheus_user: prometheus
prometheus_group: prometheus
prometheus_version: 2.0.0
node_exporter_version: 0.15.1
alertmanager_version: 0.10.0
gosu_version: "1.10"
prometheus_install_path: /opt/prometheus
prometheus_config_path: /etc/prometheus
prometheus_rule_path: "{{ prometheus_config_path }}/rules"
prometheus_file_sd_config_path: "{{ prometheus_config_path }}/tgroups"
prometheus_db_path: /var/lib/prometheus
alertmanager_db_path: /var/lib/alertmanager
prometheus_pid_path: /var/run/prometheus
prometheus_download_path: /tmp
prometheus_workdir: "{{ prometheus_download_path }}/prometheus_workdir"
prometheus_goroot: "{{ prometheus_workdir }}/go"
prometheus_gopath: "{{ prometheus_workdir }}/gopath"
prometheus_default_opts: "--config.file={{ prometheus_config_path }}/prometheus.yml --storage.tsdb.path={{ prometheus_db_path }}"
alertmanager_default_opts: "-config.file={{ prometheus_config_path }}/alertmanager.yml -storage.path={{ alertmanager_db_path }}"

View file

@ -0,0 +1,13 @@
global:
route:
group_by: ['alertname', 'cluster']
group_wait: 30s
group_interval: 5m
repeat_interval: 3h
receiver: 'default-pager'
receivers:
- name: 'default-pager'
pagerduty_configs:
- service_key: <team-X-key>

View file

@ -0,0 +1,9 @@
- name: reload prometheus
service:
name: prometheus
state: reloaded
- name: reload alertmanager
service:
name: alertmanager
state: reloaded

View file

@ -0,0 +1,61 @@
---
# Install Prometheus alertmanager.
- name: set internal variables, part 1
set_fact:
alertmanager_signature: "alertmanager-{{ alertmanager_version }}.linux-amd64"
- name: set internal variables, part 2
set_fact:
alertmanager_daemon_dir: "{{ prometheus_install_path }}/{{ alertmanager_signature }}"
- name: set download url
set_fact:
alertmanager_tarball_url: "https://github.com/prometheus/alertmanager/releases/download/v{{ alertmanager_version }}/{{ alertmanager_signature }}.tar.gz"
- name: download and uncompress alertmanager tarball
unarchive:
src: "{{ alertmanager_tarball_url }}"
dest: "{{ prometheus_install_path }}"
copy: no
owner: "{{ prometheus_user }}"
group: "{{ prometheus_group }}"
mode: "go-w"
creates: "{{ alertmanager_daemon_dir }}"
- name: create alertmanager /usr/local/bin links
file:
src: "{{ alertmanager_daemon_dir }}/{{ item }}"
dest: "/usr/local/bin/{{ item }}"
state: link
with_items:
- "alertmanager"
- "amtool"
- name: mkdir for alertmanager data (silences.json for now)
file:
path: "{{ item }}"
state: directory
owner: "{{ prometheus_user }}"
group: "{{ prometheus_group }}"
mode: "u=rwx,g=rx,o="
with_items:
- "{{ alertmanager_db_path }}"
- name: copy alertmanager systemd config
template:
src: "alertmanager.service.j2"
dest: "/lib/systemd/system/alertmanager.service"
- name: install alertmanager config file
copy:
src: "alertmanager.yml"
dest: "{{ prometheus_config_path }}/alertmanager.yml"
notify:
- reload alertmanager
- name: enable alertmanager service
service:
name: alertmanager
enabled: yes
state: started

View file

@ -0,0 +1,19 @@
# Install "gosu" utility.
#
# @see https://github.com/tianon/gosu
#
- name: set internal variables for convenience
set_fact:
gosu_bin_url: "https://github.com/tianon/gosu/releases/download/{{ gosu_version }}/gosu-amd64"
- name: download gosu executable
get_url:
url: "{{ gosu_bin_url }}"
dest: "/usr/local/bin/gosu"
- name: add executable permission
file:
path: "/usr/local/bin/gosu"
state: file
mode: "a+x"

View file

@ -0,0 +1,51 @@
# Top-level installer for Prometheus.
- name: create Prometheus group
group:
name: "{{ prometheus_group }}"
state: present
- name: create Prometheus user
user:
name: "{{ prometheus_user }}"
group: "{{ prometheus_group }}"
createhome: no
shell: /sbin/nologin
comment: "Prometheus User"
state: present
- name: create base directories
file:
path: "{{ item }}"
state: directory
owner: "{{ prometheus_user }}"
group: "{{ prometheus_group }}"
mode: "u=rwx,g=rx,o="
with_items:
- "{{ prometheus_install_path }}"
- "{{ prometheus_config_path }}"
- "{{ prometheus_pid_path }}"
- name: install helper utility "gosu"
include_tasks: install-gosu.yml
- name: install and configure prometheus service
include_tasks: prometheus.yml
when: '"prometheus" in prometheus_components'
- name: install and configure node-exporter service
include_tasks: node-exporter.yml
when: '"node_exporter" in prometheus_components'
- name: install and configure alertmanager service
include_tasks: alertmanager.yml
when: '"alertmanager" in prometheus_components'
- name: set {{ prometheus_install_path }} permissions, owner and group
file:
path: "{{ prometheus_install_path }}"
state: directory
owner: "{{ prometheus_user }}"
group: "{{ prometheus_group }}"
mode: "go-w"
recurse: yes

View file

@ -0,0 +1,40 @@
# Install Prometheus node-exporter.
#
# @see http://prometheus.io/docs/introduction/getting_started/
- name: set internal variables for convenience
set_fact:
node_exporter_daemon_dir: "{{ prometheus_install_path }}/node_exporter-{{ node_exporter_version }}.linux-amd64"
node_exporter_tarball_url: "https://github.com/prometheus/node_exporter/releases/download/v{{ node_exporter_version }}/node_exporter-{{ node_exporter_version }}.linux-amd64.tar.gz"
- name: download and untar node_exporter tarball
unarchive:
src: "{{ node_exporter_tarball_url }}"
dest: "{{ prometheus_install_path }}"
copy: no
creates: "{{ node_exporter_daemon_dir }}"
- name: create node_exporter /usr/local/bin link
file:
src: "{{ node_exporter_daemon_dir }}/node_exporter"
dest: "/usr/local/bin/node_exporter"
state: link
- name: mkdir for data
file:
path: "{{ prometheus_db_path }}"
state: directory
owner: "{{ prometheus_user }}"
group: "{{ prometheus_group }}"
mode: "u=rwx,g=rx,o="
- name: copy systemd config to server
template:
src: "../templates/node_exporter.service.j2"
dest: "/lib/systemd/system/node_exporter.service"
- name: enable node_exporter service
service:
name: node_exporter
enabled: yes
state: started

View file

@ -0,0 +1,79 @@
# Install Prometheus server.
#
# @see http://prometheus.io/docs/introduction/getting_started/
#
- name: set internal variables for convenience
set_fact:
prometheus_daemon_dir: "{{ prometheus_install_path }}/prometheus-{{ prometheus_version }}.linux-amd64"
prometheus_tarball_url: "https://github.com/prometheus/prometheus/releases/download/v{{ prometheus_version }}/prometheus-{{ prometheus_version }}.linux-amd64.tar.gz"
- name: set prometheus default options
set_fact:
prometheus_default_opts: "{{ prometheus_default_opts }} --web.console.templates={{ prometheus_daemon_dir }}/consoles --web.console.libraries={{ prometheus_daemon_dir }}/console_libraries"
- name: download and untar prometheus tarball
unarchive:
src: "{{ prometheus_tarball_url }}"
dest: "{{ prometheus_install_path }}"
copy: no
creates: "{{ prometheus_daemon_dir }}"
- name: create prometheus /usr/local/bin links
file:
src: "{{ prometheus_daemon_dir }}/{{ item }}"
dest: "/usr/local/bin/{{ item }}"
state: link
with_items:
- "prometheus"
- "promtool"
- name: mkdir for config and data
file:
path: "{{ item }}"
state: directory
owner: "{{ prometheus_user }}"
group: "{{ prometheus_group }}"
mode: "u=rwx,g=rx,o="
with_items:
- "{{ prometheus_rule_path }}"
- "{{ prometheus_file_sd_config_path }}"
- "{{ prometheus_db_path }}"
- name: copy prometheus systemd config
template:
src: "prometheus.service.j2"
dest: "/lib/systemd/system/prometheus.service"
- name: copy rule files from playbook's, if any
copy:
src: "{{ playbook_dir }}/{{ item.value.src }}"
dest: "{{ prometheus_rule_path }}/{{ item.value.dest }}"
validate: "{{ prometheus_daemon_dir }}/promtool check rules %s"
with_dict: '{{ prometheus_rule_files | default({}) }}'
notify:
- reload prometheus
- name: copy prometheus main config file from role's default, if necessary
template:
src: "prometheus.yml.j2"
dest: "{{ prometheus_config_path }}/prometheus.yml"
validate: "{{ prometheus_daemon_dir }}/promtool check config %s"
when: prometheus_conf_main is not defined
notify:
- reload prometheus
- name: copy prometheus main config file from playbook's, if any
template:
src: "{{ playbook_dir }}/{{ prometheus_conf_main }}"
dest: "{{ prometheus_config_path }}/prometheus.yml"
validate: "{{ prometheus_daemon_dir }}/promtool check config %s"
when: prometheus_conf_main is defined
notify:
- reload prometheus
- name: enable prometheus service
service:
name: prometheus
enabled: yes
state: started

View file

@ -0,0 +1,18 @@
[Unit]
Description=Prometheus alertmanager.
After=network.target
[Service]
Type=simple
PIDFile={{ prometheus_pid_path }}/alertmanager.pid
User={{ prometheus_user }}
Group={{ prometheus_group }}
{% if prometheus_opts is defined %}
ExecStart={{ alertmanager_daemon_dir }}/alertmanager {{ alertmanager_default_opts }} {{ alertmanager_opts }}
{% else %}
ExecStart={{ alertmanager_daemon_dir }}/alertmanager {{ alertmanager_default_opts }}
{% endif %}
ExecReload=/bin/kill -HUP $MAINPID

View file

@ -0,0 +1,18 @@
[Unit]
Description=node_exporter - Prometheus exporter for machine metrics.
After=network.target
[Service]
Type=simple
PIDFile={{ prometheus_pid_path }}/node_exporter.pid
User={{ prometheus_user }}
Group={{ prometheus_group }}
{% if node_exporter_opts is defined %}
ExecStart={{ node_exporter_daemon_dir }}/node_exporter {{ node_exporter_opts }}
{% else %}
ExecStart={{ node_exporter_daemon_dir }}/node_exporter
{% endif %}
ExecReload=/bin/kill -HUP $MAINPID

View file

@ -0,0 +1,18 @@
[Unit]
Description=Prometheus server daemon.
After=network.target
[Service]
Type=simple
PIDFile={{ prometheus_pid_path }}/prometheus.pid
User={{ prometheus_user }}
Group={{ prometheus_group }}
{% if prometheus_opts is defined %}
ExecStart={{ prometheus_daemon_dir }}/prometheus {{ prometheus_default_opts }} {{ prometheus_node_exporter_opts }}
{% else %}
ExecStart={{ prometheus_daemon_dir }}/prometheus {{ prometheus_default_opts }}
{% endif %}
ExecReload=/bin/kill -HUP $MAINPID

View file

@ -0,0 +1,36 @@
global:
scrape_interval: 15s # By default, scrape targets every 15 seconds.
evaluation_interval: 15s # By default, scrape targets every 15 seconds.
# scrape_timeout is set to the global default (10s).
# The labels to add to any time series or alerts when communicating with
# external systems (federation, remote storage, Alertmanager).
external_labels:
monitor: 'master'
{% if prometheus_rule_files is defined %}
# Rule files specifies a list of files from which rules are read.
rule_files:
{% for (key, value) in prometheus_rule_files.iteritems() %}
- {{ prometheus_rule_path }}/{{ value.dest }}
{% endfor %}
{% endif %}
# A list of scrape configurations.
scrape_configs:
- job_name: 'prometheus'
scrape_interval: 10s
scrape_timeout: 10s
static_configs:
- targets: ['localhost:9090']
- job_name: "node"
file_sd_configs:
- files:
- '{{ prometheus_file_sd_config_path }}/*.json'
- '{{ prometheus_file_sd_config_path }}/*.yml'
- '{{ prometheus_file_sd_config_path }}/*.yaml'
#static_configs:
#- targets:
# - "localhost:9100"