aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIsmaël Bouya <ismael.bouya@normalesup.org>2018-07-08 21:51:30 +0200
committerIsmaël Bouya <ismael.bouya@normalesup.org>2018-07-10 10:26:39 +0200
commitb5305b5cad5cbb0a2c072b29f2d4dc05126c39d4 (patch)
tree8fb1844be8b9e68d62ee2ede54c1bb39d27d4c30
parent6962463657db999c33f1aabe60e0567be218918d (diff)
downloadPuppet-b5305b5cad5cbb0a2c072b29f2d4dc05126c39d4.tar.gz
Puppet-b5305b5cad5cbb0a2c072b29f2d4dc05126c39d4.tar.zst
Puppet-b5305b5cad5cbb0a2c072b29f2d4dc05126c39d4.zip
Add postgresql monitoring
-rw-r--r--modules/profile/files/monitoring/check_last_file_date31
-rw-r--r--modules/profile/files/monitoring/check_postgres_replication35
-rw-r--r--modules/profile/manifests/monitoring/commands.pp40
-rw-r--r--modules/profile/manifests/monitoring/local_service.pp16
-rw-r--r--modules/profile/manifests/monitoring/services.pp24
-rw-r--r--modules/profile/manifests/postgresql.pp8
-rw-r--r--modules/profile/manifests/postgresql/backup_dump.pp9
-rw-r--r--modules/profile/manifests/postgresql/backup_pgbouncer.pp10
-rw-r--r--modules/profile/manifests/postgresql/master.pp10
-rw-r--r--modules/profile/manifests/postgresql/ssl.pp1
10 files changed, 149 insertions, 35 deletions
diff --git a/modules/profile/files/monitoring/check_last_file_date b/modules/profile/files/monitoring/check_last_file_date
new file mode 100644
index 0000000..8eabb57
--- /dev/null
+++ b/modules/profile/files/monitoring/check_last_file_date
@@ -0,0 +1,31 @@
1#!/bin/bash
2
3STATE_OK=0
4STATE_WARNING=1
5STATE_CRITICAL=2
6STATE_UNKNOWN=3
7
8base_path=$1
9hours=$2
10as_user=$3
11
12if [ -z "$as_user" ]; then
13 last_date=$(find $base_path -mindepth 1 -maxdepth 1 -printf "%T@\n" 2>/dev/null | sort | tail -n 1)
14else
15 last_date=$(sudo -u "$as_user" find $base_path -mindepth 1 -maxdepth 1 -printf "%T@\n" 2>/dev/null | sort | tail -n 1)
16fi
17
18if [ -z "$last_date" ]; then
19 echo "UNKNOWN: Could not read folder"
20 exit $STATE_UNKNOWN
21else
22 LC_ALL=C last_date=$(printf "%.*f" 0 $last_date)
23 min_date=$(date -d "$hours hours ago" "+%s")
24 if [ "$min_date" -lt "$last_date" ]; then
25 echo "OK: Last backup $(date -d @$last_date)"
26 exit $STATE_OK
27 else
28 echo "CRITICAL: Last backup $(date -d @$last_date)"
29 exit $STATE_CRITICAL
30 fi
31fi
diff --git a/modules/profile/files/monitoring/check_postgres_replication b/modules/profile/files/monitoring/check_postgres_replication
new file mode 100644
index 0000000..163c68a
--- /dev/null
+++ b/modules/profile/files/monitoring/check_postgres_replication
@@ -0,0 +1,35 @@
1#!/bin/bash
2
3STATE_OK=0
4STATE_WARNING=1
5STATE_CRITICAL=2
6STATE_UNKNOWN=3
7
8user=$1
9host=$2
10port=$3
11
12lag=$(psql -h $host -p $port -A -t -c "SELECT COALESCE(EXTRACT(EPOCH FROM replay_lag),0) FROM pg_stat_replication WHERE usename='$user'" 2>/dev/null)
13exit_code=$?
14
15if [[ $exit_code -ne 0 ]]; then
16 echo "UNKNOWN:Impossible to run psql command"
17 exit $STATE_UNKNOWN
18elif [[ -z "$lag" ]]; then
19 echo "UNKNOWN:No replication found for $user"
20 exit $STATE_UNKNOWN
21else
22 output="Replication lag for $user is ${lag}s"
23 LC_ALL=C lag=$(printf "%.*f" 0 $lag)
24
25 if [[ $lag -lt 5 ]]; then
26 echo "OK:$output"
27 exit $STATE_OK
28 elif [[ $lag -lt 10 ]]; then
29 echo "WARNING:$output"
30 exit $STATE_WARNING
31 else
32 echo "CRITICAL:$output"
33 exit $STATE_CRITICAL
34 fi
35fi
diff --git a/modules/profile/manifests/monitoring/commands.pp b/modules/profile/manifests/monitoring/commands.pp
index a25f7b3..4ccc816 100644
--- a/modules/profile/manifests/monitoring/commands.pp
+++ b/modules/profile/manifests/monitoring/commands.pp
@@ -3,25 +3,24 @@ class profile::monitoring::commands inherits profile::monitoring {
3 3
4 file { "/etc/naemon/monitoring-plugins": 4 file { "/etc/naemon/monitoring-plugins":
5 ensure => "directory", 5 ensure => "directory",
6 owner => "naemon", 6 owner => "root",
7 group => "naemon",
8 mode => "0700",
9 }
10
11 file { "/etc/naemon/monitoring-plugins/check_command":
12 ensure => "present",
13 owner => "naemon",
14 group => "naemon", 7 group => "naemon",
15 mode => "0700", 8 mode => "0755",
16 source => "puppet:///modules/profile/monitoring/check_command",
17 } 9 }
18 10
19 file { "/etc/naemon/monitoring-plugins/check_md_raid": 11 [
20 ensure => "present", 12 "check_command",
21 owner => "naemon", 13 "check_md_raid",
22 group => "naemon", 14 "check_postgres_replication",
23 mode => "0700", 15 "check_last_file_date",
24 source => "puppet:///modules/profile/monitoring/check_md_raid", 16 ].each |$file| {
17 file { "/etc/naemon/monitoring-plugins/$file":
18 ensure => "present",
19 owner => "root",
20 group => "naemon",
21 mode => "0755",
22 source => "puppet:///modules/profile/monitoring/$file",
23 }
25 } 24 }
26 25
27 Nagios_command { 26 Nagios_command {
@@ -48,10 +47,17 @@ class profile::monitoring::commands inherits profile::monitoring {
48 "check_ntp": 47 "check_ntp":
49 command_line => '$USER1$/check_ntp_time -H 0.arch.pool.ntp.org'; 48 command_line => '$USER1$/check_ntp_time -H 0.arch.pool.ntp.org';
50 "check_md_raid": 49 "check_md_raid":
51 command_line => '$USER2$/check_md_raid'; 50 command_line => '$USER2$/check_md_raid',
51 require => File["/etc/naemon/monitoring-plugins/check_md_raid"];
52 "check_command_output": 52 "check_command_output":
53 command_line => '$USER2$/check_command -c "$ARG1$" -o "$ARG2$" $ARG3$', 53 command_line => '$USER2$/check_command -c "$ARG1$" -o "$ARG2$" $ARG3$',
54 require => File["/etc/naemon/monitoring-plugins/check_command"]; 54 require => File["/etc/naemon/monitoring-plugins/check_command"];
55 "check_postgresql_replication":
56 command_line => '/usr/bin/sudo -u postgres $USER2$/check_postgres_replication "$ARG1$" "$ARG2$" "$ARG3$"',
57 require => File["/etc/naemon/monitoring-plugins/check_postgres_replication"];
58 "check_last_file_date":
59 command_line => '$USER2$/check_last_file_date "$ARG1$" "$ARG2$" "$ARG3$"',
60 require => File["/etc/naemon/monitoring-plugins/check_last_file_date"],
55 } 61 }
56 62
57 unless empty($naemon_url) { 63 unless empty($naemon_url) {
diff --git a/modules/profile/manifests/monitoring/local_service.pp b/modules/profile/manifests/monitoring/local_service.pp
index 3b39d1f..0caf72e 100644
--- a/modules/profile/manifests/monitoring/local_service.pp
+++ b/modules/profile/manifests/monitoring/local_service.pp
@@ -1,4 +1,5 @@
1define profile::monitoring::local_service ( 1define profile::monitoring::local_service (
2 Optional[Hash] $sudos = {},
2 Optional[Hash] $common = {}, 3 Optional[Hash] $common = {},
3 Optional[Hash] $master = {}, 4 Optional[Hash] $master = {},
4 Optional[Hash] $local = {}, 5 Optional[Hash] $local = {},
@@ -6,6 +7,15 @@ define profile::monitoring::local_service (
6 $service_description = $title 7 $service_description = $title
7 $real_hostname = lookup("base_installation::real_hostname") 8 $real_hostname = lookup("base_installation::real_hostname")
8 9
10 Nagios_service {
11 ensure => "present",
12 owner => "naemon",
13 group => "naemon",
14 notify => Service["naemon"],
15 before => Service["naemon"],
16 require => File["/etc/naemon"],
17 }
18
9 $service_generic = { 19 $service_generic = {
10 active_checks_enabled => "1", 20 active_checks_enabled => "1",
11 check_freshness => "0", 21 check_freshness => "0",
@@ -35,6 +45,12 @@ define profile::monitoring::local_service (
35 retry_interval => "1", 45 retry_interval => "1",
36 }) 46 })
37 47
48 $sudos.each |$sudo_name, $content| {
49 sudo::conf { $sudo_name:
50 content => $content,
51 before => Nagios_service[$service_description],
52 }
53 }
38 54
39 [true, false].each |$services_for_master| { 55 [true, false].each |$services_for_master| {
40 if $services_for_master { 56 if $services_for_master {
diff --git a/modules/profile/manifests/monitoring/services.pp b/modules/profile/manifests/monitoring/services.pp
index 6e59ab1..b20a352 100644
--- a/modules/profile/manifests/monitoring/services.pp
+++ b/modules/profile/manifests/monitoring/services.pp
@@ -1,21 +1,5 @@
1class profile::monitoring::services { 1class profile::monitoring::services {
2 2
3 Nagios_service {
4 ensure => "present",
5 owner => "naemon",
6 group => "naemon",
7 notify => Service["naemon"],
8 before => Service["naemon"],
9 require => File["/etc/naemon"],
10 }
11
12 sudo::conf {
13 default:
14 sudo_file_name => "naemon";
15 'naemon-fail2ban':
16 content => "naemon ALL=(root) NOPASSWD: /usr/bin/fail2ban-client ping";
17 }
18
19 profile::monitoring::local_service { 3 profile::monitoring::local_service {
20 "Size on root partition": 4 "Size on root partition":
21 local => { 5 local => {
@@ -34,9 +18,11 @@ class profile::monitoring::services {
34 check_command => "check_local_swap!20!10", 18 check_command => "check_local_swap!20!10",
35 }; 19 };
36 "fail2ban is active": 20 "fail2ban is active":
37 local => { 21 sudos => {
22 "naemon-fail2ban" => "naemon ALL=(root) NOPASSWD: /usr/bin/fail2ban-client ping",
23 },
24 local => {
38 check_command => "check_command_output!fail2ban-client ping!pong!-r root", 25 check_command => "check_command_output!fail2ban-client ping!pong!-r root",
39 require => Sudo::Conf["naemon-fail2ban"],
40 }; 26 };
41 "NTP is activated and working": 27 "NTP is activated and working":
42 local => { 28 local => {
@@ -50,4 +36,6 @@ class profile::monitoring::services {
50 check_command => "check_md_raid", 36 check_command => "check_md_raid",
51 }; 37 };
52 } 38 }
39
40 Profile::Monitoring::Local_service <| |>
53} 41}
diff --git a/modules/profile/manifests/postgresql.pp b/modules/profile/manifests/postgresql.pp
index 97ce572..edd6ea6 100644
--- a/modules/profile/manifests/postgresql.pp
+++ b/modules/profile/manifests/postgresql.pp
@@ -28,5 +28,13 @@ class profile::postgresql (
28 28
29 profile::postgresql::base_pg_hba_rules { "default": } 29 profile::postgresql::base_pg_hba_rules { "default": }
30 30
31 @profile::monitoring::local_service { "Databases are present in postgresql":
32 sudos => {
33 "naemon-postgresql-database-public" => "naemon ALL=(postgres) NOPASSWD: /usr/bin/psql -c select\ nspname\ from\ pg_catalog.pg_namespace"
34 },
35 local => {
36 check_command => "check_command_output!psql -c 'select nspname from pg_catalog.pg_namespace'!public!-r postgres",
37 }
38 }
31} 39}
32 40
diff --git a/modules/profile/manifests/postgresql/backup_dump.pp b/modules/profile/manifests/postgresql/backup_dump.pp
index 53fb20e..5e469c5 100644
--- a/modules/profile/manifests/postgresql/backup_dump.pp
+++ b/modules/profile/manifests/postgresql/backup_dump.pp
@@ -57,4 +57,13 @@ define profile::postgresql::backup_dump (
57 }, 57 },
58 ] 58 ]
59 } 59 }
60
61 @profile::monitoring::local_service { "Last postgresql dump in $pg_backup_path is not too old":
62 sudos => {
63 "naemon-postgresql-dumps" => "naemon ALL=($pg_user) NOPASSWD: /usr/bin/find $pg_backup_path -mindepth 1 -maxdepth 1 -printf %T@\\n",
64 }
65 local => {
66 check_command => "check_last_file_date!$pg_backup_path!10!$pg_user"
67 }
68 }
60} 69}
diff --git a/modules/profile/manifests/postgresql/backup_pgbouncer.pp b/modules/profile/manifests/postgresql/backup_pgbouncer.pp
index 45b8ed5..c82eefd 100644
--- a/modules/profile/manifests/postgresql/backup_pgbouncer.pp
+++ b/modules/profile/manifests/postgresql/backup_pgbouncer.pp
@@ -48,6 +48,16 @@ define profile::postgresql::backup_pgbouncer (
48 content => "${pg_infos[pgbouncer_dbname]} = host=$pg_path$pg_port user=${pg_infos[dbuser]} dbname=${pg_infos[dbname]}", 48 content => "${pg_infos[pgbouncer_dbname]} = host=$pg_path$pg_port user=${pg_infos[dbuser]} dbname=${pg_infos[dbname]}",
49 } 49 }
50 50
51 # FIXME: current pam configuration requires password for postgres
52 # @profile::monitoring::local_service { "Database ${pg_infos[pgbouncer_dbname]} is available in pgbouncer":
53 # sudos => {
54 # "naemon-postgresql-database-public" => "naemon ALL=(postgres) NOPASSWD: /usr/bin/psql -c select\ nspname\ from\ pg_catalog.pg_namespace ${pg_infos[pgbouncer_dbname]}"
55 # },
56 # local => {
57 # check_command => "check_command_output!psql -c 'select nspname from pg_catalog.pg_namespace' ${pg_infos[pgbouncer_dbname]}!public!-r postgres",
58 # }
59 # }
60
51 # pg_hba for accessed cluster 61 # pg_hba for accessed cluster
52 postgresql::server::pg_hba_rule { "$pg_backup_host - local access as ${pg_infos[dbuser]} user": 62 postgresql::server::pg_hba_rule { "$pg_backup_host - local access as ${pg_infos[dbuser]} user":
53 description => "Allow local access to ${pg_infos[dbuser]} user", 63 description => "Allow local access to ${pg_infos[dbuser]} user",
diff --git a/modules/profile/manifests/postgresql/master.pp b/modules/profile/manifests/postgresql/master.pp
index 02315a6..e775eb4 100644
--- a/modules/profile/manifests/postgresql/master.pp
+++ b/modules/profile/manifests/postgresql/master.pp
@@ -59,5 +59,15 @@ define profile::postgresql::master (
59 handle_slot => true, 59 handle_slot => true,
60 add_self_role => true, 60 add_self_role => true,
61 } 61 }
62
63 @profile::monitoring::local_service { "Postgresql replication for $backup_host is up to date":
64 sudos => {
65 "naemon-postgresql-replication-$backup_host" => "naemon ALL=(postgres) NOPASSWD: /etc/naemon/monitoring-plugins/check_postgres_replication $backup_host /run/postgresql 5432"
66
67 },
68 local => {
69 check_command => "check_postgresql_replication!$backup_host!/run/postgresql/!5432",
70 }
71 }
62 } 72 }
63} 73}
diff --git a/modules/profile/manifests/postgresql/ssl.pp b/modules/profile/manifests/postgresql/ssl.pp
index dc56c0b..9b0a95c 100644
--- a/modules/profile/manifests/postgresql/ssl.pp
+++ b/modules/profile/manifests/postgresql/ssl.pp
@@ -79,4 +79,5 @@ define profile::postgresql::ssl (
79 } 79 }
80 } 80 }
81 81
82 # FIXME: add monitoring for ssl
82} 83}