From b5305b5cad5cbb0a2c072b29f2d4dc05126c39d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Isma=C3=ABl=20Bouya?= Date: Sun, 8 Jul 2018 21:51:30 +0200 Subject: Add postgresql monitoring --- .../profile/files/monitoring/check_last_file_date | 31 +++++++++++++++++ .../files/monitoring/check_postgres_replication | 35 +++++++++++++++++++ modules/profile/manifests/monitoring/commands.pp | 40 +++++++++++++--------- .../profile/manifests/monitoring/local_service.pp | 16 +++++++++ modules/profile/manifests/monitoring/services.pp | 24 ++++--------- modules/profile/manifests/postgresql.pp | 8 +++++ .../profile/manifests/postgresql/backup_dump.pp | 9 +++++ .../manifests/postgresql/backup_pgbouncer.pp | 10 ++++++ modules/profile/manifests/postgresql/master.pp | 10 ++++++ modules/profile/manifests/postgresql/ssl.pp | 1 + 10 files changed, 149 insertions(+), 35 deletions(-) create mode 100644 modules/profile/files/monitoring/check_last_file_date create mode 100644 modules/profile/files/monitoring/check_postgres_replication (limited to 'modules/profile') diff --git a/modules/profile/files/monitoring/check_last_file_date b/modules/profile/files/monitoring/check_last_file_date new file mode 100644 index 0000000..8eabb57 --- /dev/null +++ b/modules/profile/files/monitoring/check_last_file_date @@ -0,0 +1,31 @@ +#!/bin/bash + +STATE_OK=0 +STATE_WARNING=1 +STATE_CRITICAL=2 +STATE_UNKNOWN=3 + +base_path=$1 +hours=$2 +as_user=$3 + +if [ -z "$as_user" ]; then + last_date=$(find $base_path -mindepth 1 -maxdepth 1 -printf "%T@\n" 2>/dev/null | sort | tail -n 1) +else + last_date=$(sudo -u "$as_user" find $base_path -mindepth 1 -maxdepth 1 -printf "%T@\n" 2>/dev/null | sort | tail -n 1) +fi + +if [ -z "$last_date" ]; then + echo "UNKNOWN: Could not read folder" + exit $STATE_UNKNOWN +else + LC_ALL=C last_date=$(printf "%.*f" 0 $last_date) + min_date=$(date -d "$hours hours ago" "+%s") + if [ "$min_date" -lt "$last_date" ]; then + echo "OK: Last backup $(date -d @$last_date)" + exit $STATE_OK + else + echo "CRITICAL: Last backup $(date -d @$last_date)" + exit $STATE_CRITICAL + fi +fi diff --git a/modules/profile/files/monitoring/check_postgres_replication b/modules/profile/files/monitoring/check_postgres_replication new file mode 100644 index 0000000..163c68a --- /dev/null +++ b/modules/profile/files/monitoring/check_postgres_replication @@ -0,0 +1,35 @@ +#!/bin/bash + +STATE_OK=0 +STATE_WARNING=1 +STATE_CRITICAL=2 +STATE_UNKNOWN=3 + +user=$1 +host=$2 +port=$3 + +lag=$(psql -h $host -p $port -A -t -c "SELECT COALESCE(EXTRACT(EPOCH FROM replay_lag),0) FROM pg_stat_replication WHERE usename='$user'" 2>/dev/null) +exit_code=$? + +if [[ $exit_code -ne 0 ]]; then + echo "UNKNOWN:Impossible to run psql command" + exit $STATE_UNKNOWN +elif [[ -z "$lag" ]]; then + echo "UNKNOWN:No replication found for $user" + exit $STATE_UNKNOWN +else + output="Replication lag for $user is ${lag}s" + LC_ALL=C lag=$(printf "%.*f" 0 $lag) + + if [[ $lag -lt 5 ]]; then + echo "OK:$output" + exit $STATE_OK + elif [[ $lag -lt 10 ]]; then + echo "WARNING:$output" + exit $STATE_WARNING + else + echo "CRITICAL:$output" + exit $STATE_CRITICAL + fi +fi diff --git a/modules/profile/manifests/monitoring/commands.pp b/modules/profile/manifests/monitoring/commands.pp index a25f7b3..4ccc816 100644 --- a/modules/profile/manifests/monitoring/commands.pp +++ b/modules/profile/manifests/monitoring/commands.pp @@ -3,25 +3,24 @@ class profile::monitoring::commands inherits profile::monitoring { file { "/etc/naemon/monitoring-plugins": ensure => "directory", - owner => "naemon", - group => "naemon", - mode => "0700", - } - - file { "/etc/naemon/monitoring-plugins/check_command": - ensure => "present", - owner => "naemon", + owner => "root", group => "naemon", - mode => "0700", - source => "puppet:///modules/profile/monitoring/check_command", + mode => "0755", } - file { "/etc/naemon/monitoring-plugins/check_md_raid": - ensure => "present", - owner => "naemon", - group => "naemon", - mode => "0700", - source => "puppet:///modules/profile/monitoring/check_md_raid", + [ + "check_command", + "check_md_raid", + "check_postgres_replication", + "check_last_file_date", + ].each |$file| { + file { "/etc/naemon/monitoring-plugins/$file": + ensure => "present", + owner => "root", + group => "naemon", + mode => "0755", + source => "puppet:///modules/profile/monitoring/$file", + } } Nagios_command { @@ -48,10 +47,17 @@ class profile::monitoring::commands inherits profile::monitoring { "check_ntp": command_line => '$USER1$/check_ntp_time -H 0.arch.pool.ntp.org'; "check_md_raid": - command_line => '$USER2$/check_md_raid'; + command_line => '$USER2$/check_md_raid', + require => File["/etc/naemon/monitoring-plugins/check_md_raid"]; "check_command_output": command_line => '$USER2$/check_command -c "$ARG1$" -o "$ARG2$" $ARG3$', require => File["/etc/naemon/monitoring-plugins/check_command"]; + "check_postgresql_replication": + command_line => '/usr/bin/sudo -u postgres $USER2$/check_postgres_replication "$ARG1$" "$ARG2$" "$ARG3$"', + require => File["/etc/naemon/monitoring-plugins/check_postgres_replication"]; + "check_last_file_date": + command_line => '$USER2$/check_last_file_date "$ARG1$" "$ARG2$" "$ARG3$"', + require => File["/etc/naemon/monitoring-plugins/check_last_file_date"], } unless empty($naemon_url) { diff --git a/modules/profile/manifests/monitoring/local_service.pp b/modules/profile/manifests/monitoring/local_service.pp index 3b39d1f..0caf72e 100644 --- a/modules/profile/manifests/monitoring/local_service.pp +++ b/modules/profile/manifests/monitoring/local_service.pp @@ -1,4 +1,5 @@ define profile::monitoring::local_service ( + Optional[Hash] $sudos = {}, Optional[Hash] $common = {}, Optional[Hash] $master = {}, Optional[Hash] $local = {}, @@ -6,6 +7,15 @@ define profile::monitoring::local_service ( $service_description = $title $real_hostname = lookup("base_installation::real_hostname") + Nagios_service { + ensure => "present", + owner => "naemon", + group => "naemon", + notify => Service["naemon"], + before => Service["naemon"], + require => File["/etc/naemon"], + } + $service_generic = { active_checks_enabled => "1", check_freshness => "0", @@ -35,6 +45,12 @@ define profile::monitoring::local_service ( retry_interval => "1", }) + $sudos.each |$sudo_name, $content| { + sudo::conf { $sudo_name: + content => $content, + before => Nagios_service[$service_description], + } + } [true, false].each |$services_for_master| { if $services_for_master { diff --git a/modules/profile/manifests/monitoring/services.pp b/modules/profile/manifests/monitoring/services.pp index 6e59ab1..b20a352 100644 --- a/modules/profile/manifests/monitoring/services.pp +++ b/modules/profile/manifests/monitoring/services.pp @@ -1,21 +1,5 @@ class profile::monitoring::services { - Nagios_service { - ensure => "present", - owner => "naemon", - group => "naemon", - notify => Service["naemon"], - before => Service["naemon"], - require => File["/etc/naemon"], - } - - sudo::conf { - default: - sudo_file_name => "naemon"; - 'naemon-fail2ban': - content => "naemon ALL=(root) NOPASSWD: /usr/bin/fail2ban-client ping"; - } - profile::monitoring::local_service { "Size on root partition": local => { @@ -34,9 +18,11 @@ class profile::monitoring::services { check_command => "check_local_swap!20!10", }; "fail2ban is active": - local => { + sudos => { + "naemon-fail2ban" => "naemon ALL=(root) NOPASSWD: /usr/bin/fail2ban-client ping", + }, + local => { check_command => "check_command_output!fail2ban-client ping!pong!-r root", - require => Sudo::Conf["naemon-fail2ban"], }; "NTP is activated and working": local => { @@ -50,4 +36,6 @@ class profile::monitoring::services { check_command => "check_md_raid", }; } + + Profile::Monitoring::Local_service <| |> } diff --git a/modules/profile/manifests/postgresql.pp b/modules/profile/manifests/postgresql.pp index 97ce572..edd6ea6 100644 --- a/modules/profile/manifests/postgresql.pp +++ b/modules/profile/manifests/postgresql.pp @@ -28,5 +28,13 @@ class profile::postgresql ( profile::postgresql::base_pg_hba_rules { "default": } + @profile::monitoring::local_service { "Databases are present in postgresql": + sudos => { + "naemon-postgresql-database-public" => "naemon ALL=(postgres) NOPASSWD: /usr/bin/psql -c select\ nspname\ from\ pg_catalog.pg_namespace" + }, + local => { + check_command => "check_command_output!psql -c 'select nspname from pg_catalog.pg_namespace'!public!-r postgres", + } + } } diff --git a/modules/profile/manifests/postgresql/backup_dump.pp b/modules/profile/manifests/postgresql/backup_dump.pp index 53fb20e..5e469c5 100644 --- a/modules/profile/manifests/postgresql/backup_dump.pp +++ b/modules/profile/manifests/postgresql/backup_dump.pp @@ -57,4 +57,13 @@ define profile::postgresql::backup_dump ( }, ] } + + @profile::monitoring::local_service { "Last postgresql dump in $pg_backup_path is not too old": + sudos => { + "naemon-postgresql-dumps" => "naemon ALL=($pg_user) NOPASSWD: /usr/bin/find $pg_backup_path -mindepth 1 -maxdepth 1 -printf %T@\\n", + } + local => { + check_command => "check_last_file_date!$pg_backup_path!10!$pg_user" + } + } } diff --git a/modules/profile/manifests/postgresql/backup_pgbouncer.pp b/modules/profile/manifests/postgresql/backup_pgbouncer.pp index 45b8ed5..c82eefd 100644 --- a/modules/profile/manifests/postgresql/backup_pgbouncer.pp +++ b/modules/profile/manifests/postgresql/backup_pgbouncer.pp @@ -48,6 +48,16 @@ define profile::postgresql::backup_pgbouncer ( content => "${pg_infos[pgbouncer_dbname]} = host=$pg_path$pg_port user=${pg_infos[dbuser]} dbname=${pg_infos[dbname]}", } + # FIXME: current pam configuration requires password for postgres + # @profile::monitoring::local_service { "Database ${pg_infos[pgbouncer_dbname]} is available in pgbouncer": + # sudos => { + # "naemon-postgresql-database-public" => "naemon ALL=(postgres) NOPASSWD: /usr/bin/psql -c select\ nspname\ from\ pg_catalog.pg_namespace ${pg_infos[pgbouncer_dbname]}" + # }, + # local => { + # check_command => "check_command_output!psql -c 'select nspname from pg_catalog.pg_namespace' ${pg_infos[pgbouncer_dbname]}!public!-r postgres", + # } + # } + # pg_hba for accessed cluster postgresql::server::pg_hba_rule { "$pg_backup_host - local access as ${pg_infos[dbuser]} user": description => "Allow local access to ${pg_infos[dbuser]} user", diff --git a/modules/profile/manifests/postgresql/master.pp b/modules/profile/manifests/postgresql/master.pp index 02315a6..e775eb4 100644 --- a/modules/profile/manifests/postgresql/master.pp +++ b/modules/profile/manifests/postgresql/master.pp @@ -59,5 +59,15 @@ define profile::postgresql::master ( handle_slot => true, add_self_role => true, } + + @profile::monitoring::local_service { "Postgresql replication for $backup_host is up to date": + sudos => { + "naemon-postgresql-replication-$backup_host" => "naemon ALL=(postgres) NOPASSWD: /etc/naemon/monitoring-plugins/check_postgres_replication $backup_host /run/postgresql 5432" + + }, + local => { + check_command => "check_postgresql_replication!$backup_host!/run/postgresql/!5432", + } + } } } diff --git a/modules/profile/manifests/postgresql/ssl.pp b/modules/profile/manifests/postgresql/ssl.pp index dc56c0b..9b0a95c 100644 --- a/modules/profile/manifests/postgresql/ssl.pp +++ b/modules/profile/manifests/postgresql/ssl.pp @@ -79,4 +79,5 @@ define profile::postgresql::ssl ( } } + # FIXME: add monitoring for ssl } -- cgit v1.2.3