diff options
author | Ismaël Bouya <ismael.bouya@normalesup.org> | 2018-07-08 21:51:30 +0200 |
---|---|---|
committer | Ismaël Bouya <ismael.bouya@normalesup.org> | 2018-07-10 10:26:39 +0200 |
commit | b5305b5cad5cbb0a2c072b29f2d4dc05126c39d4 (patch) | |
tree | 8fb1844be8b9e68d62ee2ede54c1bb39d27d4c30 | |
parent | 6962463657db999c33f1aabe60e0567be218918d (diff) | |
download | Puppet-b5305b5cad5cbb0a2c072b29f2d4dc05126c39d4.tar.gz Puppet-b5305b5cad5cbb0a2c072b29f2d4dc05126c39d4.tar.zst Puppet-b5305b5cad5cbb0a2c072b29f2d4dc05126c39d4.zip |
Add postgresql monitoring
-rw-r--r-- | modules/profile/files/monitoring/check_last_file_date | 31 | ||||
-rw-r--r-- | modules/profile/files/monitoring/check_postgres_replication | 35 | ||||
-rw-r--r-- | modules/profile/manifests/monitoring/commands.pp | 40 | ||||
-rw-r--r-- | modules/profile/manifests/monitoring/local_service.pp | 16 | ||||
-rw-r--r-- | modules/profile/manifests/monitoring/services.pp | 24 | ||||
-rw-r--r-- | modules/profile/manifests/postgresql.pp | 8 | ||||
-rw-r--r-- | modules/profile/manifests/postgresql/backup_dump.pp | 9 | ||||
-rw-r--r-- | modules/profile/manifests/postgresql/backup_pgbouncer.pp | 10 | ||||
-rw-r--r-- | modules/profile/manifests/postgresql/master.pp | 10 | ||||
-rw-r--r-- | modules/profile/manifests/postgresql/ssl.pp | 1 |
10 files changed, 149 insertions, 35 deletions
diff --git a/modules/profile/files/monitoring/check_last_file_date b/modules/profile/files/monitoring/check_last_file_date new file mode 100644 index 0000000..8eabb57 --- /dev/null +++ b/modules/profile/files/monitoring/check_last_file_date | |||
@@ -0,0 +1,31 @@ | |||
1 | #!/bin/bash | ||
2 | |||
3 | STATE_OK=0 | ||
4 | STATE_WARNING=1 | ||
5 | STATE_CRITICAL=2 | ||
6 | STATE_UNKNOWN=3 | ||
7 | |||
8 | base_path=$1 | ||
9 | hours=$2 | ||
10 | as_user=$3 | ||
11 | |||
12 | if [ -z "$as_user" ]; then | ||
13 | last_date=$(find $base_path -mindepth 1 -maxdepth 1 -printf "%T@\n" 2>/dev/null | sort | tail -n 1) | ||
14 | else | ||
15 | last_date=$(sudo -u "$as_user" find $base_path -mindepth 1 -maxdepth 1 -printf "%T@\n" 2>/dev/null | sort | tail -n 1) | ||
16 | fi | ||
17 | |||
18 | if [ -z "$last_date" ]; then | ||
19 | echo "UNKNOWN: Could not read folder" | ||
20 | exit $STATE_UNKNOWN | ||
21 | else | ||
22 | LC_ALL=C last_date=$(printf "%.*f" 0 $last_date) | ||
23 | min_date=$(date -d "$hours hours ago" "+%s") | ||
24 | if [ "$min_date" -lt "$last_date" ]; then | ||
25 | echo "OK: Last backup $(date -d @$last_date)" | ||
26 | exit $STATE_OK | ||
27 | else | ||
28 | echo "CRITICAL: Last backup $(date -d @$last_date)" | ||
29 | exit $STATE_CRITICAL | ||
30 | fi | ||
31 | fi | ||
diff --git a/modules/profile/files/monitoring/check_postgres_replication b/modules/profile/files/monitoring/check_postgres_replication new file mode 100644 index 0000000..163c68a --- /dev/null +++ b/modules/profile/files/monitoring/check_postgres_replication | |||
@@ -0,0 +1,35 @@ | |||
1 | #!/bin/bash | ||
2 | |||
3 | STATE_OK=0 | ||
4 | STATE_WARNING=1 | ||
5 | STATE_CRITICAL=2 | ||
6 | STATE_UNKNOWN=3 | ||
7 | |||
8 | user=$1 | ||
9 | host=$2 | ||
10 | port=$3 | ||
11 | |||
12 | lag=$(psql -h $host -p $port -A -t -c "SELECT COALESCE(EXTRACT(EPOCH FROM replay_lag),0) FROM pg_stat_replication WHERE usename='$user'" 2>/dev/null) | ||
13 | exit_code=$? | ||
14 | |||
15 | if [[ $exit_code -ne 0 ]]; then | ||
16 | echo "UNKNOWN:Impossible to run psql command" | ||
17 | exit $STATE_UNKNOWN | ||
18 | elif [[ -z "$lag" ]]; then | ||
19 | echo "UNKNOWN:No replication found for $user" | ||
20 | exit $STATE_UNKNOWN | ||
21 | else | ||
22 | output="Replication lag for $user is ${lag}s" | ||
23 | LC_ALL=C lag=$(printf "%.*f" 0 $lag) | ||
24 | |||
25 | if [[ $lag -lt 5 ]]; then | ||
26 | echo "OK:$output" | ||
27 | exit $STATE_OK | ||
28 | elif [[ $lag -lt 10 ]]; then | ||
29 | echo "WARNING:$output" | ||
30 | exit $STATE_WARNING | ||
31 | else | ||
32 | echo "CRITICAL:$output" | ||
33 | exit $STATE_CRITICAL | ||
34 | fi | ||
35 | fi | ||
diff --git a/modules/profile/manifests/monitoring/commands.pp b/modules/profile/manifests/monitoring/commands.pp index a25f7b3..4ccc816 100644 --- a/modules/profile/manifests/monitoring/commands.pp +++ b/modules/profile/manifests/monitoring/commands.pp | |||
@@ -3,25 +3,24 @@ class profile::monitoring::commands inherits profile::monitoring { | |||
3 | 3 | ||
4 | file { "/etc/naemon/monitoring-plugins": | 4 | file { "/etc/naemon/monitoring-plugins": |
5 | ensure => "directory", | 5 | ensure => "directory", |
6 | owner => "naemon", | 6 | owner => "root", |
7 | group => "naemon", | ||
8 | mode => "0700", | ||
9 | } | ||
10 | |||
11 | file { "/etc/naemon/monitoring-plugins/check_command": | ||
12 | ensure => "present", | ||
13 | owner => "naemon", | ||
14 | group => "naemon", | 7 | group => "naemon", |
15 | mode => "0700", | 8 | mode => "0755", |
16 | source => "puppet:///modules/profile/monitoring/check_command", | ||
17 | } | 9 | } |
18 | 10 | ||
19 | file { "/etc/naemon/monitoring-plugins/check_md_raid": | 11 | [ |
20 | ensure => "present", | 12 | "check_command", |
21 | owner => "naemon", | 13 | "check_md_raid", |
22 | group => "naemon", | 14 | "check_postgres_replication", |
23 | mode => "0700", | 15 | "check_last_file_date", |
24 | source => "puppet:///modules/profile/monitoring/check_md_raid", | 16 | ].each |$file| { |
17 | file { "/etc/naemon/monitoring-plugins/$file": | ||
18 | ensure => "present", | ||
19 | owner => "root", | ||
20 | group => "naemon", | ||
21 | mode => "0755", | ||
22 | source => "puppet:///modules/profile/monitoring/$file", | ||
23 | } | ||
25 | } | 24 | } |
26 | 25 | ||
27 | Nagios_command { | 26 | Nagios_command { |
@@ -48,10 +47,17 @@ class profile::monitoring::commands inherits profile::monitoring { | |||
48 | "check_ntp": | 47 | "check_ntp": |
49 | command_line => '$USER1$/check_ntp_time -H 0.arch.pool.ntp.org'; | 48 | command_line => '$USER1$/check_ntp_time -H 0.arch.pool.ntp.org'; |
50 | "check_md_raid": | 49 | "check_md_raid": |
51 | command_line => '$USER2$/check_md_raid'; | 50 | command_line => '$USER2$/check_md_raid', |
51 | require => File["/etc/naemon/monitoring-plugins/check_md_raid"]; | ||
52 | "check_command_output": | 52 | "check_command_output": |
53 | command_line => '$USER2$/check_command -c "$ARG1$" -o "$ARG2$" $ARG3$', | 53 | command_line => '$USER2$/check_command -c "$ARG1$" -o "$ARG2$" $ARG3$', |
54 | require => File["/etc/naemon/monitoring-plugins/check_command"]; | 54 | require => File["/etc/naemon/monitoring-plugins/check_command"]; |
55 | "check_postgresql_replication": | ||
56 | command_line => '/usr/bin/sudo -u postgres $USER2$/check_postgres_replication "$ARG1$" "$ARG2$" "$ARG3$"', | ||
57 | require => File["/etc/naemon/monitoring-plugins/check_postgres_replication"]; | ||
58 | "check_last_file_date": | ||
59 | command_line => '$USER2$/check_last_file_date "$ARG1$" "$ARG2$" "$ARG3$"', | ||
60 | require => File["/etc/naemon/monitoring-plugins/check_last_file_date"], | ||
55 | } | 61 | } |
56 | 62 | ||
57 | unless empty($naemon_url) { | 63 | unless empty($naemon_url) { |
diff --git a/modules/profile/manifests/monitoring/local_service.pp b/modules/profile/manifests/monitoring/local_service.pp index 3b39d1f..0caf72e 100644 --- a/modules/profile/manifests/monitoring/local_service.pp +++ b/modules/profile/manifests/monitoring/local_service.pp | |||
@@ -1,4 +1,5 @@ | |||
1 | define profile::monitoring::local_service ( | 1 | define profile::monitoring::local_service ( |
2 | Optional[Hash] $sudos = {}, | ||
2 | Optional[Hash] $common = {}, | 3 | Optional[Hash] $common = {}, |
3 | Optional[Hash] $master = {}, | 4 | Optional[Hash] $master = {}, |
4 | Optional[Hash] $local = {}, | 5 | Optional[Hash] $local = {}, |
@@ -6,6 +7,15 @@ define profile::monitoring::local_service ( | |||
6 | $service_description = $title | 7 | $service_description = $title |
7 | $real_hostname = lookup("base_installation::real_hostname") | 8 | $real_hostname = lookup("base_installation::real_hostname") |
8 | 9 | ||
10 | Nagios_service { | ||
11 | ensure => "present", | ||
12 | owner => "naemon", | ||
13 | group => "naemon", | ||
14 | notify => Service["naemon"], | ||
15 | before => Service["naemon"], | ||
16 | require => File["/etc/naemon"], | ||
17 | } | ||
18 | |||
9 | $service_generic = { | 19 | $service_generic = { |
10 | active_checks_enabled => "1", | 20 | active_checks_enabled => "1", |
11 | check_freshness => "0", | 21 | check_freshness => "0", |
@@ -35,6 +45,12 @@ define profile::monitoring::local_service ( | |||
35 | retry_interval => "1", | 45 | retry_interval => "1", |
36 | }) | 46 | }) |
37 | 47 | ||
48 | $sudos.each |$sudo_name, $content| { | ||
49 | sudo::conf { $sudo_name: | ||
50 | content => $content, | ||
51 | before => Nagios_service[$service_description], | ||
52 | } | ||
53 | } | ||
38 | 54 | ||
39 | [true, false].each |$services_for_master| { | 55 | [true, false].each |$services_for_master| { |
40 | if $services_for_master { | 56 | if $services_for_master { |
diff --git a/modules/profile/manifests/monitoring/services.pp b/modules/profile/manifests/monitoring/services.pp index 6e59ab1..b20a352 100644 --- a/modules/profile/manifests/monitoring/services.pp +++ b/modules/profile/manifests/monitoring/services.pp | |||
@@ -1,21 +1,5 @@ | |||
1 | class profile::monitoring::services { | 1 | class profile::monitoring::services { |
2 | 2 | ||
3 | Nagios_service { | ||
4 | ensure => "present", | ||
5 | owner => "naemon", | ||
6 | group => "naemon", | ||
7 | notify => Service["naemon"], | ||
8 | before => Service["naemon"], | ||
9 | require => File["/etc/naemon"], | ||
10 | } | ||
11 | |||
12 | sudo::conf { | ||
13 | default: | ||
14 | sudo_file_name => "naemon"; | ||
15 | 'naemon-fail2ban': | ||
16 | content => "naemon ALL=(root) NOPASSWD: /usr/bin/fail2ban-client ping"; | ||
17 | } | ||
18 | |||
19 | profile::monitoring::local_service { | 3 | profile::monitoring::local_service { |
20 | "Size on root partition": | 4 | "Size on root partition": |
21 | local => { | 5 | local => { |
@@ -34,9 +18,11 @@ class profile::monitoring::services { | |||
34 | check_command => "check_local_swap!20!10", | 18 | check_command => "check_local_swap!20!10", |
35 | }; | 19 | }; |
36 | "fail2ban is active": | 20 | "fail2ban is active": |
37 | local => { | 21 | sudos => { |
22 | "naemon-fail2ban" => "naemon ALL=(root) NOPASSWD: /usr/bin/fail2ban-client ping", | ||
23 | }, | ||
24 | local => { | ||
38 | check_command => "check_command_output!fail2ban-client ping!pong!-r root", | 25 | check_command => "check_command_output!fail2ban-client ping!pong!-r root", |
39 | require => Sudo::Conf["naemon-fail2ban"], | ||
40 | }; | 26 | }; |
41 | "NTP is activated and working": | 27 | "NTP is activated and working": |
42 | local => { | 28 | local => { |
@@ -50,4 +36,6 @@ class profile::monitoring::services { | |||
50 | check_command => "check_md_raid", | 36 | check_command => "check_md_raid", |
51 | }; | 37 | }; |
52 | } | 38 | } |
39 | |||
40 | Profile::Monitoring::Local_service <| |> | ||
53 | } | 41 | } |
diff --git a/modules/profile/manifests/postgresql.pp b/modules/profile/manifests/postgresql.pp index 97ce572..edd6ea6 100644 --- a/modules/profile/manifests/postgresql.pp +++ b/modules/profile/manifests/postgresql.pp | |||
@@ -28,5 +28,13 @@ class profile::postgresql ( | |||
28 | 28 | ||
29 | profile::postgresql::base_pg_hba_rules { "default": } | 29 | profile::postgresql::base_pg_hba_rules { "default": } |
30 | 30 | ||
31 | @profile::monitoring::local_service { "Databases are present in postgresql": | ||
32 | sudos => { | ||
33 | "naemon-postgresql-database-public" => "naemon ALL=(postgres) NOPASSWD: /usr/bin/psql -c select\ nspname\ from\ pg_catalog.pg_namespace" | ||
34 | }, | ||
35 | local => { | ||
36 | check_command => "check_command_output!psql -c 'select nspname from pg_catalog.pg_namespace'!public!-r postgres", | ||
37 | } | ||
38 | } | ||
31 | } | 39 | } |
32 | 40 | ||
diff --git a/modules/profile/manifests/postgresql/backup_dump.pp b/modules/profile/manifests/postgresql/backup_dump.pp index 53fb20e..5e469c5 100644 --- a/modules/profile/manifests/postgresql/backup_dump.pp +++ b/modules/profile/manifests/postgresql/backup_dump.pp | |||
@@ -57,4 +57,13 @@ define profile::postgresql::backup_dump ( | |||
57 | }, | 57 | }, |
58 | ] | 58 | ] |
59 | } | 59 | } |
60 | |||
61 | @profile::monitoring::local_service { "Last postgresql dump in $pg_backup_path is not too old": | ||
62 | sudos => { | ||
63 | "naemon-postgresql-dumps" => "naemon ALL=($pg_user) NOPASSWD: /usr/bin/find $pg_backup_path -mindepth 1 -maxdepth 1 -printf %T@\\n", | ||
64 | } | ||
65 | local => { | ||
66 | check_command => "check_last_file_date!$pg_backup_path!10!$pg_user" | ||
67 | } | ||
68 | } | ||
60 | } | 69 | } |
diff --git a/modules/profile/manifests/postgresql/backup_pgbouncer.pp b/modules/profile/manifests/postgresql/backup_pgbouncer.pp index 45b8ed5..c82eefd 100644 --- a/modules/profile/manifests/postgresql/backup_pgbouncer.pp +++ b/modules/profile/manifests/postgresql/backup_pgbouncer.pp | |||
@@ -48,6 +48,16 @@ define profile::postgresql::backup_pgbouncer ( | |||
48 | content => "${pg_infos[pgbouncer_dbname]} = host=$pg_path$pg_port user=${pg_infos[dbuser]} dbname=${pg_infos[dbname]}", | 48 | content => "${pg_infos[pgbouncer_dbname]} = host=$pg_path$pg_port user=${pg_infos[dbuser]} dbname=${pg_infos[dbname]}", |
49 | } | 49 | } |
50 | 50 | ||
51 | # FIXME: current pam configuration requires password for postgres | ||
52 | # @profile::monitoring::local_service { "Database ${pg_infos[pgbouncer_dbname]} is available in pgbouncer": | ||
53 | # sudos => { | ||
54 | # "naemon-postgresql-database-public" => "naemon ALL=(postgres) NOPASSWD: /usr/bin/psql -c select\ nspname\ from\ pg_catalog.pg_namespace ${pg_infos[pgbouncer_dbname]}" | ||
55 | # }, | ||
56 | # local => { | ||
57 | # check_command => "check_command_output!psql -c 'select nspname from pg_catalog.pg_namespace' ${pg_infos[pgbouncer_dbname]}!public!-r postgres", | ||
58 | # } | ||
59 | # } | ||
60 | |||
51 | # pg_hba for accessed cluster | 61 | # pg_hba for accessed cluster |
52 | postgresql::server::pg_hba_rule { "$pg_backup_host - local access as ${pg_infos[dbuser]} user": | 62 | postgresql::server::pg_hba_rule { "$pg_backup_host - local access as ${pg_infos[dbuser]} user": |
53 | description => "Allow local access to ${pg_infos[dbuser]} user", | 63 | description => "Allow local access to ${pg_infos[dbuser]} user", |
diff --git a/modules/profile/manifests/postgresql/master.pp b/modules/profile/manifests/postgresql/master.pp index 02315a6..e775eb4 100644 --- a/modules/profile/manifests/postgresql/master.pp +++ b/modules/profile/manifests/postgresql/master.pp | |||
@@ -59,5 +59,15 @@ define profile::postgresql::master ( | |||
59 | handle_slot => true, | 59 | handle_slot => true, |
60 | add_self_role => true, | 60 | add_self_role => true, |
61 | } | 61 | } |
62 | |||
63 | @profile::monitoring::local_service { "Postgresql replication for $backup_host is up to date": | ||
64 | sudos => { | ||
65 | "naemon-postgresql-replication-$backup_host" => "naemon ALL=(postgres) NOPASSWD: /etc/naemon/monitoring-plugins/check_postgres_replication $backup_host /run/postgresql 5432" | ||
66 | |||
67 | }, | ||
68 | local => { | ||
69 | check_command => "check_postgresql_replication!$backup_host!/run/postgresql/!5432", | ||
70 | } | ||
71 | } | ||
62 | } | 72 | } |
63 | } | 73 | } |
diff --git a/modules/profile/manifests/postgresql/ssl.pp b/modules/profile/manifests/postgresql/ssl.pp index dc56c0b..9b0a95c 100644 --- a/modules/profile/manifests/postgresql/ssl.pp +++ b/modules/profile/manifests/postgresql/ssl.pp | |||
@@ -79,4 +79,5 @@ define profile::postgresql::ssl ( | |||
79 | } | 79 | } |
80 | } | 80 | } |
81 | 81 | ||
82 | # FIXME: add monitoring for ssl | ||
82 | } | 83 | } |