Commit 44d5c88e authored by Julien Gomes Dias's avatar Julien Gomes Dias
Browse files

Merge branch 'prometheus' into 'master'

[add] Prometheus and Grafana role for monitoring.

See merge request !14
parents 3e68a3fa 9a836605
---
app_main_port: "3000"
app_group: "{{ app_user }}"
\ No newline at end of file
---
- name: reload nginx grafana
service: name=nginx state=reloaded
---
- name: Create of update let'encrypt certificate
import_role:
name: _letsencrypt_certificate
when: app_domain is defined and app_domain != ""
- name: create user {{ app_user }}
import_role:
name: _user
vars:
user_name: "{{ app_user }}"
user_password: "{{ app_user_password }}"
when: app_user is defined
- name: "directory for www logs mounted in jail"
file:
state: directory
path: "{{ app_instance_root }}/../logs"
mode: 0711
owner: "{{ app_user }}"
group: "{{ app_group }}"
when: app_user is defined
- name: "Create data folder"
file:
state: directory
path: "{{ app_instance_root }}/data"
mode: 0775
owner: "{{ app_user }}"
group: "{{ app_group }}"
when: app_user is defined
- name: "template of grafana.ini {{ app_instance_id }}"
template:
src: "grafana_ini.j2"
dest: "{{ app_instance_root }}/data/grafana.ini"
- name: "template nginx_app.j2 {{ app_instance_id }}"
template:
src: "nginx_app.j2"
dest: "/etc/nginx/sites-available/{{ app_instance_id }}.conf"
notify: reload nginx grafana
tags:
- rev_proxy
- name: "copy docker-compose {{ app_instance_id }}"
template:
src: "docker_compose_yml.j2"
dest: "{{ app_instance_root }}/docker-compose.yml"
tags:
- grafana_installation
- name: "start grafana environment"
docker_compose:
project_src: "{{ app_instance_root }}"
state: present
tags:
- grafana_installation
- name: log rotate
import_role:
name: _app_logrotate
- name: "enable site for {{ app_domain }}"
file:
state: link
path: "/etc/nginx/sites-enabled/{{ app_instance_id }}.conf"
src: "/etc/nginx/sites-available/{{ app_instance_id }}.conf"
notify: reload nginx grafana
- name: Add monit
import_role:
name: _app_monit
when: monit_request is defined and monit_request != ''
---
- name: "set user home var "
set_fact:
app_user_home: "/home/{{ app_user }}"
tags:
- setpath
- name: "set instance root"
set_fact:
app_instance_root: "{{ app_user_home }}/{{ app_instance_id }}"
tags:
- setpath
- name: "set instance root"
set_fact:
run_user: "{{ app_user }}"
tags:
- setpath
- import_tasks: install.yml
when: app_run in ['install', 'reinstall']
- import_tasks: uninstall.yml
when: app_run == 'uninstall'
\ No newline at end of file
version: '3.7'
services:
grafana:
image: grafana/grafana-oss
user: "472:0"
environment:
- GF_PATHS_DATA=/var/lib/grafana
volumes:
- {{ app_instance_root }}/data:/var/lib/grafana
ports:
- 127.0.0.1:${APP_MAIN_PORT:-3000}:3000
restart: always
command: sh -c "/usr/local/bin/init.sh && chown -R root:root /etc/grafana && chmod -R a+r /etc/grafana && chown -R grafana:grafana /var/lib/grafana && chown -R grafana:grafana /usr/share/grafana && chown grafana:grafana /home/storage/ && chmod 777 /home/storage/"
This diff is collapsed.
map $http_user_agent $log_ua {
~Monit 0;
default 1;
}
server {
listen 80;
listen [::]:80;
server_name {{ app_domain | mandatory }};
# enforce https
return 301 https://$server_name$request_uri;
}
server {
listen 443 ssl http2;
listen [::]:443 ssl http2;
server_name {{ app_domain }};
ssl_certificate /etc/letsencrypt/live/{{ app_domain }}/fullchain.pem;
ssl_certificate_key /etc/letsencrypt/live/{{ app_domain }}/privkey.pem;
# Add headers to serve security related headers
# Before enabling Strict-Transport-Security headers please read into this
# topic first.
# add_header Strict-Transport-Security "max-age=15768000;
# includeSubDomains; preload;";
#
# WARNING: Only add the preload option once you read about
# the consequences in https://hstspreload.org/. This option
# will add the domain to a hardcoded list that is shipped
# in all major browsers and getting removed from this list
# could take several months.
add_header X-Content-Type-Options nosniff;
add_header X-XSS-Protection "1; mode=block";
add_header X-Robots-Tag all; # https://developers.google.com/search/docs/advanced/robots/robots_meta_tag
add_header X-Download-Options noopen;
add_header X-Permitted-Cross-Domain-Policies none;
add_header Strict-Transport-Security "max-age=15768000";
# Enable gzip but do not remove ETag headers
gzip on;
gzip_vary on;
gzip_comp_level 4;
gzip_min_length 256;
gzip_proxied expired no-cache no-store private no_last_modified no_etag auth;
gzip_types application/atom+xml application/javascript application/json application/ld+json application/manifest+json application/rss+xml application/vnd.geo+json application/vnd.ms-fontobject application/x-font-ttf application/x-web-app-manifest+json application/xhtml+xml application/xml font/opentype image/bmp image/svg+xml image/x-icon text/cache-manifest text/css text/plain text/vcard text/vnd.rim.location.xloc text/vtt text/x-component text/x-cross-domain-policy;
access_log {{ www_log }}/{{ app_instance_id }}/access.log combined if=$log_ua;
error_log {{ www_log }}/{{ app_instance_id }}/error.log;
# set max upload size
client_max_body_size 512M;
fastcgi_buffers 64 4K;
location / {
proxy_set_header HOST $host;
proxy_set_header X-Forwarded-Proto https;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Host $server_name;
proxy_pass http://localhost:{{ app_main_port }};
}
}
---
# vars file for prometheus
---
skip_list:
- '106'
- '204'
- '208'
*.retry
*.log
.molecule
.cache
__pycache__/
.pytest_cache
.tox
---
pull_request_rules:
- name: automatic merge and new release from cloudalchemybot
conditions:
- "status-success=Travis CI - Pull Request"
- status-success=WIP
- head~=autoupdate|skeleton
- author=cloudalchemybot
actions:
merge:
method: squash
strict: true
- name: delete head branch after merge
conditions: []
actions:
delete_head_branch: {}
---
extends: default
ignore: |
.github/
meta/
rules:
braces:
max-spaces-inside: 1
level: error
brackets:
max-spaces-inside: 1
level: error
line-length: disable
This diff is collapsed.
# Contributor Guideline
This document provides an overview of how you can participate in improving this project or extending it. We are
grateful for all your help: bug reports and fixes, code contributions, documentation or ideas. Feel free to join, we
appreciate your support!!
## Communication
### GitHub repositories
Much of the issues, goals and ideas are tracked in the respective projects in GitHub. Please use this channel to report
bugs, ask questions, and request new features .
## git and GitHub
In order to contribute code please:
1. Fork the project on GitHub
2. Clone the project
3. Add changes (and tests)
4. Commit and push
5. Create a merge-request
To have your code merged, see the expectations listed below.
You can find a well-written guide [here](https://help.github.com/articles/fork-a-repo).
Please follow common commit best-practices. Be explicit, have a short summary, a well-written description and
references. This is especially important for the merge-request.
Some great guidelines can be found [here](https://wiki.openstack.org/wiki/GitCommitMessages) and
[here](http://robots.thoughtbot.com/5-useful-tips-for-a-better-commit-message).
## Releases
We try to stick to semantic versioning and our releases are automated. Release is created by assigning a keyword (in a
way similar to circle ci keyword [`[ci skip]`](https://docs.travis-ci.com/user/customizing-the-build#Skipping-a-build))
to a commit with merge request. Available keywords are (square brackets are important!):
* `[patch]`, `[fix]`, `[bugfix]` - for PATCH version release
* `[minor]`, `[feature]`, `[feat]` - for MINOR version release
* `[major]`, `[breaking change]` - for MAJOR version release
## Changelog
Changelog is generated automatically during release process and all information is taken from github issues, PRs and
labels.
## Expectations
### Keep it simple
We try to provide production ready ansible roles which should be as much zero-conf as possible but this doesn't mean to
overcomplicate things. Just follow [KISS](https://en.wikipedia.org/wiki/KISS_principle).
### Be explicit
* Please avoid using nonsensical property and variable names.
* Use self-describing attribute names for user configuration.
* In case of failures, communicate what happened and why a failure occurs to the user. Make it easy to track the code
or action that produced the error. Try to catch and handle errors if possible to provide improved failure messages.
### Add tests
We are striving to use at least two test scenarios located in [/molecule](molecule) directory. First one
([default](molecule/default)) is testing default configuration without any additional variables, second one
([alternative](molecule/alternative)) is testing what happens when many variables from
[/defaults/main.yml](defaults/main.yml) are changed. When adding new functionalities please add tests to proper
scenarios. Tests are written in testinfra framework and are located in `/tests` subdirectory of scenario directory
(for example default tests are in [/molecule/default/tests](molecule/default/tests)).
More information about:
- [testinfra](http://testinfra.readthedocs.io/en/latest/index.html)
- [molecule](https://molecule.readthedocs.io/en/latest/index.html)
### Follow best practices
Please follow [ansible best practices](http://docs.ansible.com/ansible/latest/playbooks_best_practices.html) and
especially provide meaningful names to tasks and even comments where needed.
Our test framework automatically lints code with [`yamllint`](https://github.com/adrienverge/yamllint),
[`ansible-lint`](https://github.com/willthames/ansible-lint), and [`flake8`](https://gitlab.com/pycqa/flake8) programs
so be sure to follow their rules.
Remember: Code is generally read much more often than written.
### Use Markdown
Wherever possible, please refrain from any other formats and stick to simple markdown.
## Requirements regarding roles design
We are trying to create the best and most secure installation method for non-containerized prometheus stack components.
To accomplish this all roles need to support:
- current and at least one previous ansible version
- systemd as the only available process manager
- at least latest debian and CentOS distributions
The MIT License (MIT)
Copyright (c) 2017-2018 Pawel Krupa and Roman Demachkovych
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
############# Repris du rôle de Cloud Alchemy https://github.com/cloudalchemy/ansible-node-exporter
<p><img src="https://www.circonus.com/wp-content/uploads/2015/03/sol-icon-itOps.png" alt="graph logo" title="graph" align="right" height="60" /></p>
# Ansible Role: node exporter
[![Build Status](https://travis-ci.org/cloudalchemy/ansible-node-exporter.svg?branch=master)](https://travis-ci.org/cloudalchemy/ansible-node-exporter)
[![License](https://img.shields.io/badge/license-MIT%20License-brightgreen.svg)](https://opensource.org/licenses/MIT)
[![Ansible Role](https://img.shields.io/badge/ansible%20role-cloudalchemy.node_exporter-blue.svg)](https://galaxy.ansible.com/cloudalchemy/node_exporter/)
[![GitHub tag](https://img.shields.io/github/tag/cloudalchemy/ansible-node-exporter.svg)](https://github.com/cloudalchemy/ansible-node-exporter/tags)
## Warning
Due to limitations of galaxy.ansible.com we had to move the role to https://galaxy.ansible.com/cloudalchemy/node_exporter and use `_` instead of `-` in role name. This is a breaking change and unfortunately, it affects all versions of node_exporter role as ansible galaxy doesn't offer any form of redirection. We are sorry for the inconvenience.
## Description
Deploy prometheus [node exporter](https://github.com/prometheus/node_exporter) using ansible.
## Requirements
- Ansible >= 2.7 (It might work on previous versions, but we cannot guarantee it)
- gnu-tar on Mac deployer host (`brew install gnu-tar`)
- Passlib is required when using the basic authentication feature (`pip install passlib[bcrypt]`)
## Role Variables
All variables which can be overridden are stored in [defaults/main.yml](defaults/main.yml) and are listed in the table below.
| Name | Default Value | Description |
| -------------- | ------------- | -----------------------------------|
| `node_exporter_version` | 1.1.2 | Node exporter package version. Also accepts latest as parameter. |
| `node_exporter_binary_local_dir` | "" | Enables the use of local packages instead of those distributed on github. The parameter may be set to a directory where the `node_exporter` binary is stored on the host where ansible is run. This overrides the `node_exporter_version` parameter |
| `app_main_port` | "9100" | Port on which node exporter will listen |
| `node_exporter_web_telemetry_path` | "/metrics" | Path under which to expose metrics |
| `node_exporter_enabled_collectors` | ```["systemd",{textfile: {directory: "{{node_exporter_textfile_dir}}"}}]``` | List of dicts defining additionally enabled collectors and their configuration. It adds collectors to [those enabled by default](https://github.com/prometheus/node_exporter#enabled-by-default). |
| `node_exporter_disabled_collectors` | [] | List of disabled collectors. By default node_exporter disables collectors listed [here](https://github.com/prometheus/node_exporter#disabled-by-default). |
| `node_exporter_textfile_dir` | "/var/lib/node_exporter" | Directory used by the [Textfile Collector](https://github.com/prometheus/node_exporter#textfile-collector). To get permissions to write metrics in this directory, users must be in `node-exp` system group. __Note__: More information in TROUBLESHOOTING.md guide.
| `node_exporter_tls_server_config` | {} | Configuration for TLS authentication. Keys and values are the same as in [node_exporter docs](https://github.com/prometheus/node_exporter/blob/master/https/README.md#sample-config). |
| `node_exporter_http_server_config` | {} | Config for HTTP/2 support. Keys and values are the same as in [node_exporter docs](https://github.com/prometheus/node_exporter/blob/master/https/README.md#sample-config). |
| `node_exporter_basic_auth_users` | {} | Dictionary of users and password for basic authentication. Passwords are automatically hashed with bcrypt. |
## Example
### Playbook
Use it in a playbook as follows:
```yaml
- hosts: all
roles:
- cloudalchemy.node_exporter
```
### TLS config
Before running node_exporter role, the user needs to provision their own certificate and key.
```yaml
- hosts: all
pre_tasks:
- name: Create node_exporter cert dir
file:
path: "/etc/node_exporter"
state: directory
owner: root
group: root
- name: Create cert and key
openssl_certificate:
path: /etc/node_exporter/tls.cert
csr_path: /etc/node_exporter/tls.csr
privatekey_path: /etc/node_exporter/tls.key
provider: selfsigned
roles:
- cloudalchemy.node_exporter
vars:
node_exporter_tls_server_config:
cert_file: /etc/node_exporter/tls.cert
key_file: /etc/node_exporter/tls.key
node_exporter_basic_auth_users:
randomuser: examplepassword
```
### Demo site
We provide an example site that demonstrates a full monitoring solution based on prometheus and grafana. The repository with code and links to running instances is [available on github](https://github.com/cloudalchemy/demo-site) and the site is hosted on [DigitalOcean](https://digitalocean.com).
## Local Testing
The preferred way of locally testing the role is to use Docker and [molecule](https://github.com/ansible-community/molecule) (v3.x). You will have to install Docker on your system. See "Get started" for a Docker package suitable for your system. Running your tests is as simple as executing `molecule test`.
## Continuous Integration
Combining molecule and circle CI allows us to test how new PRs will behave when used with multiple ansible versions and multiple operating systems. This also allows use to create test scenarios for different role configurations. As a result we have quite a large test matrix which can take more time than local testing, so please be patient.
## Contributing
See [contributor guideline](CONTRIBUTING.md).
## Troubleshooting
See [troubleshooting](TROUBLESHOOTING.md).
## License
This project is licensed under MIT License. See [LICENSE](/LICENSE) for more details.
# Troubleshooting
## Bad requests (HTTP 400)
This role downloads checksums from the Github project to verify the integrity of artifacts installed on your servers. When downloading the checksums, a "bad request" error might occur.
This happens in environments which (knowningly or unknowling) use the [netrc mechanism](https://www.gnu.org/software/inetutils/manual/html_node/The-_002enetrc-file.html) to auto-login into servers.
Unless netrc is needed by your playbook and ansible roles, please unset the var like so:
```
$ NETRC= ansible-playbook ...
```
Or:
```
$ export NETRC=
$ ansible-playbook ...
```
## node_exporter doesn't report data from textfile collector
There are 3 potential issues why node_exporter doesn't pick up data:
1. Duplicated metrics across multiple files.
2. File is not readable by node_exporter process.
3. Textfile collector is not enabled.
Solving first possibility is out of scope of the role as data is created somewhere else. When creating that data ensure
files are readable by `node-exp` user. To get access to the directory with files your process needs to be in `node-exp`
group.
Lastly ansible role misconfiguration can also lead to data not being picked up. Check if `node_exporter` textfile
collector is enabled in `node_exporter_enabled_collectors` as follows:
```yaml
node_exporter_enabled_collectors:
- textfile:
directory: "{{ node_exporter_textfile_dir }}"
```
__note___: `node_exporter_textfile_dir` variable is only responsible for creating a directory not enabling a collector.
---
node_exporter_version: 1.3.1
node_exporter_binary_local_dir: ""
node_exporter_web_telemetry_path: "/metrics"
app_main_port: 9100
node_exporter_textfile_dir: "/var/lib/node_exporter"
node_exporter_tls_server_config: {}
node_exporter_http_server_config: {}
node_exporter_basic_auth_users: {}
node_exporter_enabled_collectors:
- systemd
- textfile:
directory: "{{ node_exporter_textfile_dir }}"
# - filesystem:
# ignored-mount-points: "^/(sys|proc|dev)($|/)"
# ignored-fs-types: "^(sys|proc|auto)fs$"
node_exporter_disabled_collectors: []
# Internal variables.
_node_exporter_binary_install_dir: "/usr/local/bin"
_node_exporter_system_group: "node-exp"
_node_exporter_system_user: "{{ _node_exporter_system_group }}"
node_exporter_checksum: "68f3802c2dd3980667e4ba65ea2e1fb03f4a4ba026cca375f15a0390ff850949"
---
- name: restart node_exporter
become: true
systemd:
daemon_reload: true
name: node_exporter
state: restarted
- name: reload nginx node_exporter
service: name=nginx state=reloaded
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment