Commit 56db03dd authored by HgO's avatar HgO
Browse files

create alert when prometheus is down

parent 33ddb9cb
Pipeline #804 passed with stage
in 3 minutes and 14 seconds
......@@ -4,3 +4,5 @@ alertmanager_matrix_username: "neutribot"
alertmanager_matrix_password: "{{ vault_alertmanager_matrix_password }}"
alertmanager_matrix_host: matrix.domainepublic.net
alertmanager_matrix_room: "!RwsRwTYSutAOiujkCC:matrix.domainepublic.net"
alertmanager_prometheus_remote_hosts: "{{ groups.prometheus }}"
......@@ -12,3 +12,5 @@ alertmanager_matrix_password: !vault |
alertmanager_matrix_host: matrix.domainepublic.net
alertmanager_matrix_room: "!wjECRmYYmDbBJcebBt:matrix.domainepublic.net"
alertmanager_matrix_service: alertmanager_service
alertmanager_prometheus_remote_hosts: "{{ groups.all }}"
......@@ -7,8 +7,8 @@
state: directory
- name: Copie des alertes de base
copy:
src: alertmanager/base.rules
template:
src: rules/base.rules.j2
dest: /etc/prometheus/rules.d/base.rules
owner: root
group: root
......
#jinja2: variable_start_string:'{{{', variable_end_string: '}}}'
{{{ ansible_managed | comment }}}
groups:
- name: Health
rules:
......@@ -10,6 +13,21 @@ groups:
description: 'Instance {{ $labels.instance }} is down'
summary: 'Instance is down'
{% if alertmanager_prometheus_remote_hosts %}
- alert: Prometheus Down
for: 5m
expr: |
{% for host in alertmanager_prometheus_remote_hosts if host != inventory_hostname %}
absent(up{job="prometheus",monitor="{{{ host }}}"}){% if not loop.last %} OR {% endif %}
{% endfor %}
labels:
severity: critical
annotations:
description: 'Prometheus instance {{ $labels.monitor }} is down'
summary: 'Prometheus is down'
{% endif %}
- name: Disk Usage
rules:
- alert: Low Disk Space
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment