Commit 7c4fcb1b authored by Tharyrok's avatar Tharyrok
Browse files

Merge branch '122-federation-prometheus-remote-storage' into 'main'

Resolve "Fédération Prometheus / remote storage"

Closes #122

See merge request Neutrinet/infra!171
parents 55081f3a a3718c74
Pipeline #776 passed with stage
in 4 minutes and 40 seconds
telegraf_prometheus_client_listen_address: localhost telegraf_prometheus_client_listen_address: localhost
prometheus_version: 2.33.3 prometheus_version: 2.33.3
prometheus_telegraf_hosts: "{{ groups.all }}"
prometheus_username: prometheus prometheus_username: prometheus
prometheus_password: "{{ vault_prometheus_password }}" prometheus_password: "{{ vault_prometheus_password }}"
prometheus_federation_hosts: [] prometheus_promscale_enabled: false
prometheus_storage_retention: 2d prometheus_promscale_username: promscale
prometheus_promscale_password: "{{ vault_prometheus_promscale_password }}"
prometheus_remote_write_url: https://promscale.neutrinet.be/api/v1/write
prometheus_storage_retention: 1w
prometheus_storage_path: /var/lib/prometheus
promscale_version: 0.10.0
promscale_extension_version: 0.3.2
promscale_retention: 1y
promscale_postgresql_password: "{{ vault_promscale_postgresql_password }}"
letsencrypt_dns_gandi_api_key: "{{ vault_dns_gandi_api_key }}" letsencrypt_dns_gandi_api_key: "{{ vault_dns_gandi_api_key }}"
letsencrypt_dns_gandi_sharing_id: "{{ vault_dns_gandi_sharing_id }}" letsencrypt_dns_gandi_sharing_id: "{{ vault_dns_gandi_sharing_id }}"
......
$ANSIBLE_VAULT;1.1;AES256 $ANSIBLE_VAULT;1.1;AES256
38383461626666646237303639623730393965383035653331333032313735663032356330326239 62323864633432363831313165333436323961356362613931323162373861333333393134383332
6663333734643430336135326164303364613639666564320a643039313761343566306137356565 6335353639653161363332316636643265306462343931340a643864316339313239396264643830
32376636373163313565333866343730353362626135616633616639636437323136373237323837 38646537373839666133376564306138373330633135323162393935636232343764373931363134
6264376333326461320a376239386266356130626365366235313362323231353338626263633131 6338623833323237330a386130666336363932333930303931316639306634346562393561303439
35376139363639616636333135396563653361303838386634316464623132303932613538643363 39663539616664633761396539316364613834306532643134653537613431393634376132613365
62303134323439383338326264383738376130383839373362663633646263393031333566386131 37303033316135343364653134356561633330666664623639653566353862303832626361623334
66316266386564396132393961653939383235643538333732383537663064353266353237623230 32613535313832613330613238623763303236346437636539653936303637616564333566633032
36396366376630383461633338363937663530313434383938393038653164323063626339336637 30333435633738396136623430623035623837646464646532383361333261306463386636356336
38333739343630366564366133613334323735396365666637613035656634346233336638656130 63333765613839343230653535393065306365326264366631663933613837323832656237333363
37323033643861313435313432656630373231386563613631306563316565343461626330613832 38353364656265313937333733396439656162653239346365613630343734656238383635653363
65633464653037333636653833656462393736616430626338373931313931643239666335303662 35643535333232613162396131353031383165643864353363313664373166623032356231326537
64666662643365353864303961383064633331316661633734333762323731356263303262376137 30616231326535343562356661353265663261363262343062373938646564363965356330623337
6330 64363835363762303633363236643830396165643732303963383938613161323032656666346266
61646662346131623664663264386331633031323532623530323764623731653439663366393535
35326338326565666163313066396164653936336461346638346132316136383233636133323832
31326631646534623763626366653036633832333233396133323438306238666537616364316135
31663334316533613361613039656239636365383635656336363661343431626562386634366564
30316664663437363565373836653535623963323964363862323936663462643739353636356634
39663633356362616634653535376466326162653536656661653832633164366261356132633134
39623231393832326636
prometheus_federation_hosts:
- probe-01.first.neutri.net
- probe-02.ovh.neutri.net
- man.patata.louise.neutri.net
prometheus_telegraf_hosts: prometheus_telegraf_hosts:
- "{{ inventory_hostname }}" - "{{ inventory_hostname }}"
- vpn.neutri.net - vpn.neutri.net
- topi.louise.neutri.net - topi.louise.neutri.net
- nam.louise.neutri.net - nam.louise.neutri.net
- bour.louise.neutri.net - bour.louise.neutri.net
prometheus_storage_retention: 90d
prometheus_promscale_enabled: true
prometheus_remote_write_url: https://localhost:9201/write
promscale_memory_limit: 1GB
timescaledb_memory_limit: 2GB
timescaledb_cpu_limit: 2
letsencrypt_domains_gandi: letsencrypt_domains_gandi:
- neutrinet.be - neutrinet.be
...@@ -29,6 +31,14 @@ haproxy: ...@@ -29,6 +31,14 @@ haproxy:
healthcheck_url: GET /-/healthy healthcheck_url: GET /-/healthy
username: "{{ prometheus_username }}" username: "{{ prometheus_username }}"
password: "{{ prometheus_password }}" password: "{{ prometheus_password }}"
- hostname: promscale.neutrinet.be
target:
- 127.0.0.1
target_port: 9201
healthcheck: true
healthcheck_url: GET /healthz
username: "{{ prometheus_promscale_username }}"
password: "{{ prometheus_promscale_password }}"
probe_http_certs: probe_http_certs:
- https://neutrinet.be:443 - https://neutrinet.be:443
......
prometheus_telegraf_hosts:
- "{{ inventory_hostname }}"
- vpn.neutri.net
- topi.louise.neutri.net
- nam.louise.neutri.net
- bour.louise.neutri.net
prometheus_promscale_enabled: true
prometheus_remote_write_url: http://localhost:9201/write
prometheus_storage_path: /media/data/prometheus
promscale_postgresql_database: prometheus
postgresql_patroni_hosts: []
postgresql_host: "{{ inventory_hostname }}"
promscale_memory_limit: 4GB
timescaledb_memory_limit: 4GB
timescaledb_cpu_limit: 4
letsencrypt_domains_gandi:
- neutrinet.be
- ovh.neutri.net
haproxy:
- hostname: grafana.neutrinet.be
target:
- 127.0.0.1
target_port: 3000
healthcheck: true
healthcheck_url: GET /api/health
- hostname: prometheus.neutrinet.be
target:
- 127.0.0.1
target_port: 9090
healthcheck: true
healthcheck_url: GET /-/healthy
username: "{{ prometheus_promscale_username }}"
password: "{{ prometheus_promscale_password }}"
probe_http_certs:
- https://neutrinet.be:443
- https://labriqueinter.net:443
- https://internetcu.be:443
- https://api.neutrinet.be:443
- https://grafana.neutrinet.be:443
- https://support.neutrinet.be:443
- https://mail.neutri.net:443
- https://promscale.htz.neutri.net:443
- https://probe-01.first.neutri.net:443
- https://probe-02.ovh.neutri.net:443
...@@ -25,7 +25,6 @@ ...@@ -25,7 +25,6 @@
[hetzner] [hetzner]
mail.htz.neutri.net mail.htz.neutri.net
monitoring.htz.neutri.net
runner.htz.neutri.net runner.htz.neutri.net
pbs-01.htz.neutri.net pbs-01.htz.neutri.net
...@@ -34,6 +33,7 @@ probe-01.first.neutri.net ...@@ -34,6 +33,7 @@ probe-01.first.neutri.net
[ovhcloud] [ovhcloud]
probe-02.ovh.neutri.net probe-02.ovh.neutri.net
storage-01.ovh.neutri.net
[louise:children] [louise:children]
louise_baremetal louise_baremetal
...@@ -133,12 +133,12 @@ backoffice.patata.louise.neutri.net ...@@ -133,12 +133,12 @@ backoffice.patata.louise.neutri.net
[nftables] [nftables]
mail.htz.neutri.net mail.htz.neutri.net
monitoring.htz.neutri.net
runner.htz.neutri.net runner.htz.neutri.net
probe-01.first.neutri.net probe-01.first.neutri.net
probe-02.ovh.neutri.net probe-02.ovh.neutri.net
edge-01.louise.neutri.net edge-01.louise.neutri.net
edge-02.louise.neutri.net edge-02.louise.neutri.net
storage-01.ovh.neutri.net
[pfsense] [pfsense]
pfsense-01.louise.neutri.net pfsense-01.louise.neutri.net
...@@ -150,19 +150,22 @@ ansible_python_interpreter=/usr/local/bin/python3.7 ...@@ -150,19 +150,22 @@ ansible_python_interpreter=/usr/local/bin/python3.7
[probe] [probe]
probe-01.first.neutri.net probe-01.first.neutri.net
probe-02.ovh.neutri.net probe-02.ovh.neutri.net
monitoring.htz.neutri.net storage-01.ovh.neutri.net
[prometheus] [prometheus]
probe-01.first.neutri.net probe-01.first.neutri.net
probe-02.ovh.neutri.net probe-02.ovh.neutri.net
monitoring.htz.neutri.net
man.patata.louise.neutri.net man.patata.louise.neutri.net
storage-01.ovh.neutri.net
[alertmanager] [alertmanager]
monitoring.htz.neutri.net storage-01.ovh.neutri.net
[grafana] [grafana]
monitoring.htz.neutri.net storage-01.ovh.neutri.net
[gitlab_runner] [gitlab_runner]
runner.patata.louise.neutri.net runner.patata.louise.neutri.net
[promscale]
storage-01.ovh.neutri.net
...@@ -10,4 +10,3 @@ ...@@ -10,4 +10,3 @@
roles: roles:
- prometheus - prometheus
- haproxy
---
# Déploiement de Prometheus
- hosts: promscale
become: true
pre_tasks:
- name: Mise à jour du cache APT
apt:
update_cache: true
roles:
- promscale
- haproxy
...@@ -15,3 +15,4 @@ ...@@ -15,3 +15,4 @@
- import_playbook: apps/prometheus.yml - import_playbook: apps/prometheus.yml
- import_playbook: apps/alertmanager.yml - import_playbook: apps/alertmanager.yml
- import_playbook: apps/gitlab_runner.yml - import_playbook: apps/gitlab_runner.yml
- import_playbook: apps/promscale.yml
postgresql_patroni_hosts: [] postgresql_patroni_hosts: []
postgresql_db_name: neutrinet postgresql_db_name: neutrinet
postgresql_db_password: neutrinet postgresql_db_password: neutrinet
postgresql_db_role_attributes: ""
postgresql_db_change_owner: false
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
name: acl name: acl
state: present state: present
- name: Création de la base de données PostgreSQL - name: Création de la base de données PostgreSQL {{ postgresql_db_name }}
community.postgresql.postgresql_db: community.postgresql.postgresql_db:
name: "{{ postgresql_db_name }}" name: "{{ postgresql_db_name }}"
state: present state: present
...@@ -20,6 +20,16 @@ ...@@ -20,6 +20,16 @@
name: "{{ postgresql_db_name }}" name: "{{ postgresql_db_name }}"
password: "{{ postgresql_db_password }}" password: "{{ postgresql_db_password }}"
priv: ALL priv: ALL
role_attr_flags: "{{ postgresql_db_role_attributes }}"
become_user: postgres become_user: postgres
delegate_to: "{{ postgresql_patroni_primary_host | default(inventory_hostname) }}" delegate_to: "{{ postgresql_patroni_primary_host | default(inventory_hostname) }}"
no_log: true no_log: true
- name: Changement du propriétaire de la base de données
community.postgresql.postgresql_db:
db: "{{ postgresql_db_name }}"
owner: "{{ postgresql_db_name }}"
state: present
become_user: postgres
delegate_to: "{{ postgresql_patroni_primary_host | default(inventory_hostname) }}"
when: postgresql_db_change_owner
prometheus_version: 2.33.3 prometheus_version: 2.33.3
prometheus_federation_hosts:
- prometheus.example.com
prometheus_telegraf_hosts: "{{ groups.all }}" prometheus_telegraf_hosts: "{{ groups.all }}"
prometheus_username: prometheus prometheus_username: prometheus
prometheus_password: neutrinet prometheus_password: neutrinet
prometheus_storage_retention: 1y
prometheus_promscale_enabled: true
prometheus_promscale_username: promscale
prometheus_promscale_password: neutrinet
prometheus_remote_write_url: http://localhost:9201/write
prometheus_storage_retention: 1w
prometheus_storage_path: /var/lib/prometheus
- name: reload systemd
systemd:
daemon_reload: true
- name: restart prometheus - name: restart prometheus
service: systemd:
name: prometheus name: prometheus
daemon_reload: true
state: restarted state: restarted
- name: reload prometheus - name: reload prometheus
......
...@@ -14,3 +14,11 @@ haproxy: ...@@ -14,3 +14,11 @@ haproxy:
healthcheck_url: GET /-/healthy healthcheck_url: GET /-/healthy
username: "{{ prometheus_username }}" username: "{{ prometheus_username }}"
password: "{{ prometheus_password }}" password: "{{ prometheus_password }}"
- hostname: promscale.neutrinet.be
target:
- 127.0.0.1
target_port: 9201
healthcheck: true
healthcheck_url: GET /healthz
username: "{{ prometheus_promscale_username }}"
password: "{{ prometheus_promscale_password }}"
...@@ -8,7 +8,7 @@ platforms: ...@@ -8,7 +8,7 @@ platforms:
- name: buster-prometheus-molecule - name: buster-prometheus-molecule
box: debian/buster64 box: debian/buster64
cpu: 2 cpu: 2
memory: 512 memory: 2048
interfaces: interfaces:
- network_name: private_network - network_name: private_network
type: dhcp type: dhcp
......
...@@ -10,3 +10,4 @@ ...@@ -10,3 +10,4 @@
roles: roles:
- telegraf - telegraf
- postgres_standalone
- import_tasks: user.yml - name: Création de l'utilisateur
import_tasks: user.yml
tags: ['user'] tags: ['user']
- import_tasks: prometheus.yml - name: Installation de Prometheus
import_tasks: prometheus.yml
tags: ['prometheus'] tags: ['prometheus']
- import_tasks: cleanup.yml - name: Nettoyage des anciennes versions
import_tasks: cleanup.yml
tags: ['cleanup'] tags: ['cleanup']
...@@ -79,7 +79,6 @@ ...@@ -79,7 +79,6 @@
group: root group: root
mode: "u=rw,go=r" mode: "u=rw,go=r"
notify: notify:
- reload systemd
- restart prometheus - restart prometheus
- name: Activation de Prometheus au démarrage - name: Activation de Prometheus au démarrage
......
...@@ -13,6 +13,30 @@ global: ...@@ -13,6 +13,30 @@ global:
external_labels: external_labels:
monitor: {{ ansible_fqdn }} monitor: {{ ansible_fqdn }}
remote_write:
- url: "{{ prometheus_remote_write_url }}"
{% if not prometheus_promscale_enabled %}
{# Unless we write to localhost, we need to authenticate to push metrics to remote storage #}
basic_auth:
username: {{ prometheus_promscale_username | to_json }}
password: {{ prometheus_promscale_password | string | to_json }}
{% endif %}
remote_timeout: 30s
queue_config:
capacity: 10000
max_samples_per_send: 3000
batch_send_deadline: 10s
min_shards: 4
max_shards: 200
min_backoff: 100ms
max_backoff: 10s
{% if prometheus_promscale_enabled %}
remote_read:
- url: "http://localhost:9201/read"
read_recent: true
{% endif %}
# Alertmanager configuration # Alertmanager configuration
alerting: alerting:
alertmanagers: alertmanagers:
...@@ -38,26 +62,13 @@ scrape_configs: ...@@ -38,26 +62,13 @@ scrape_configs:
- targets: - targets:
- localhost:9090 - localhost:9090
{% for host in prometheus_federation_hosts %} {% if prometheus_promscale_enabled %}
# Grab federation metrics from Prometheus instance {{ host }} # Grab metrics about promscale
- job_name: federation-{{ host | replace('.', '-') }} - job_name: promscale
honor_labels: true
metrics_path: /federate
params:
'match[]':
- '{job="prometheus"}'
- '{job=~"telegraf-.*"}'
{% if hostvars[host].prometheus_username is defined and hostvars[host].prometheus_password is defined %}
basic_auth:
username: {{ hostvars[host].prometheus_username | to_json }}
password: {{ hostvars[host].prometheus_password | string | to_json }}
{% endif %}
scheme: https
static_configs: static_configs:
- targets: - targets:
- "{{ host }}" - localhost:9201
{% endif %}
{% endfor %}
{% for host in prometheus_telegraf_hosts %} {% for host in prometheus_telegraf_hosts %}
# Grab telegraf metrics from {{ host }} # Grab telegraf metrics from {{ host }}
......
...@@ -10,14 +10,17 @@ User=prometheus ...@@ -10,14 +10,17 @@ User=prometheus
Group=prometheus Group=prometheus
ExecReload=/bin/kill -HUP $MAINPID ExecReload=/bin/kill -HUP $MAINPID
ExecStart=/usr/local/bin/prometheus \ ExecStart=/usr/local/bin/prometheus \
--storage.tsdb.path=/var/lib/prometheus \ --storage.tsdb.path={{ prometheus_storage_path }} \
--storage.tsdb.retention.time={{ prometheus_storage_retention }} \ --storage.tsdb.retention.time={{ prometheus_storage_retention }} \
--web.listen-address='localhost:9090' \ --web.listen-address='localhost:9090' \
{% if prometheus_promscale_enabled %}
--web.enable-remote-write-receiver \
{% endif %}
--config.file=/etc/prometheus/prometheus.yml --config.file=/etc/prometheus/prometheus.yml
Restart=always Restart=always
# Systemd hardening # Systemd hardening
ReadWritePaths=/var/lib/prometheus ReadWritePaths={{ prometheus_storage_path }}
CapabilityBoundingSet= CapabilityBoundingSet=
LimitNOFILE=65000 LimitNOFILE=65000
......
promscale_version: 0.10.0
promscale_extension_version: 0.3.2
promscale_retention: 1y
promscale_memory_limit: 512MB
postgresql_major_version: 13
promscale_postgresql_database: promscale
promscale_postgresql_password: neutrinet
timescaledb_memory_limit: 512MB
timescaledb_cpu_limit: 1
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment