--- /dev/null
+#!/bin/bash
+
+STATE_OK=0
+STATE_WARNING=1
+STATE_CRITICAL=2
+STATE_UNKNOWN=3
+
+base_path=$1
+hours=$2
+as_user=$3
+
+if [ -z "$as_user" ]; then
+ last_date=$(find $base_path -mindepth 1 -maxdepth 1 -printf "%T@\n" 2>/dev/null | sort | tail -n 1)
+else
+ last_date=$(sudo -u "$as_user" find $base_path -mindepth 1 -maxdepth 1 -printf "%T@\n" 2>/dev/null | sort | tail -n 1)
+fi
+
+if [ -z "$last_date" ]; then
+ echo "UNKNOWN: Could not read folder"
+ exit $STATE_UNKNOWN
+else
+ LC_ALL=C last_date=$(printf "%.*f" 0 $last_date)
+ min_date=$(date -d "$hours hours ago" "+%s")
+ if [ "$min_date" -lt "$last_date" ]; then
+ echo "OK: Last backup $(date -d @$last_date)"
+ exit $STATE_OK
+ else
+ echo "CRITICAL: Last backup $(date -d @$last_date)"
+ exit $STATE_CRITICAL
+ fi
+fi
--- /dev/null
+#!/bin/bash
+
+STATE_OK=0
+STATE_WARNING=1
+STATE_CRITICAL=2
+STATE_UNKNOWN=3
+
+user=$1
+host=$2
+port=$3
+
+lag=$(psql -h $host -p $port -A -t -c "SELECT COALESCE(EXTRACT(EPOCH FROM replay_lag),0) FROM pg_stat_replication WHERE usename='$user'" 2>/dev/null)
+exit_code=$?
+
+if [[ $exit_code -ne 0 ]]; then
+ echo "UNKNOWN:Impossible to run psql command"
+ exit $STATE_UNKNOWN
+elif [[ -z "$lag" ]]; then
+ echo "UNKNOWN:No replication found for $user"
+ exit $STATE_UNKNOWN
+else
+ output="Replication lag for $user is ${lag}s"
+ LC_ALL=C lag=$(printf "%.*f" 0 $lag)
+
+ if [[ $lag -lt 5 ]]; then
+ echo "OK:$output"
+ exit $STATE_OK
+ elif [[ $lag -lt 10 ]]; then
+ echo "WARNING:$output"
+ exit $STATE_WARNING
+ else
+ echo "CRITICAL:$output"
+ exit $STATE_CRITICAL
+ fi
+fi
file { "/etc/naemon/monitoring-plugins":
ensure => "directory",
- owner => "naemon",
- group => "naemon",
- mode => "0700",
- }
-
- file { "/etc/naemon/monitoring-plugins/check_command":
- ensure => "present",
- owner => "naemon",
+ owner => "root",
group => "naemon",
- mode => "0700",
- source => "puppet:///modules/profile/monitoring/check_command",
+ mode => "0755",
}
- file { "/etc/naemon/monitoring-plugins/check_md_raid":
- ensure => "present",
- owner => "naemon",
- group => "naemon",
- mode => "0700",
- source => "puppet:///modules/profile/monitoring/check_md_raid",
+ [
+ "check_command",
+ "check_md_raid",
+ "check_postgres_replication",
+ "check_last_file_date",
+ ].each |$file| {
+ file { "/etc/naemon/monitoring-plugins/$file":
+ ensure => "present",
+ owner => "root",
+ group => "naemon",
+ mode => "0755",
+ source => "puppet:///modules/profile/monitoring/$file",
+ }
}
Nagios_command {
"check_ntp":
command_line => '$USER1$/check_ntp_time -H 0.arch.pool.ntp.org';
"check_md_raid":
- command_line => '$USER2$/check_md_raid';
+ command_line => '$USER2$/check_md_raid',
+ require => File["/etc/naemon/monitoring-plugins/check_md_raid"];
"check_command_output":
command_line => '$USER2$/check_command -c "$ARG1$" -o "$ARG2$" $ARG3$',
require => File["/etc/naemon/monitoring-plugins/check_command"];
+ "check_postgresql_replication":
+ command_line => '/usr/bin/sudo -u postgres $USER2$/check_postgres_replication "$ARG1$" "$ARG2$" "$ARG3$"',
+ require => File["/etc/naemon/monitoring-plugins/check_postgres_replication"];
+ "check_last_file_date":
+ command_line => '$USER2$/check_last_file_date "$ARG1$" "$ARG2$" "$ARG3$"',
+ require => File["/etc/naemon/monitoring-plugins/check_last_file_date"],
}
unless empty($naemon_url) {
define profile::monitoring::local_service (
+ Optional[Hash] $sudos = {},
Optional[Hash] $common = {},
Optional[Hash] $master = {},
Optional[Hash] $local = {},
$service_description = $title
$real_hostname = lookup("base_installation::real_hostname")
+ Nagios_service {
+ ensure => "present",
+ owner => "naemon",
+ group => "naemon",
+ notify => Service["naemon"],
+ before => Service["naemon"],
+ require => File["/etc/naemon"],
+ }
+
$service_generic = {
active_checks_enabled => "1",
check_freshness => "0",
retry_interval => "1",
})
+ $sudos.each |$sudo_name, $content| {
+ sudo::conf { $sudo_name:
+ content => $content,
+ before => Nagios_service[$service_description],
+ }
+ }
[true, false].each |$services_for_master| {
if $services_for_master {
class profile::monitoring::services {
- Nagios_service {
- ensure => "present",
- owner => "naemon",
- group => "naemon",
- notify => Service["naemon"],
- before => Service["naemon"],
- require => File["/etc/naemon"],
- }
-
- sudo::conf {
- default:
- sudo_file_name => "naemon";
- 'naemon-fail2ban':
- content => "naemon ALL=(root) NOPASSWD: /usr/bin/fail2ban-client ping";
- }
-
profile::monitoring::local_service {
"Size on root partition":
local => {
check_command => "check_local_swap!20!10",
};
"fail2ban is active":
- local => {
+ sudos => {
+ "naemon-fail2ban" => "naemon ALL=(root) NOPASSWD: /usr/bin/fail2ban-client ping",
+ },
+ local => {
check_command => "check_command_output!fail2ban-client ping!pong!-r root",
- require => Sudo::Conf["naemon-fail2ban"],
};
"NTP is activated and working":
local => {
check_command => "check_md_raid",
};
}
+
+ Profile::Monitoring::Local_service <| |>
}
profile::postgresql::base_pg_hba_rules { "default": }
+ @profile::monitoring::local_service { "Databases are present in postgresql":
+ sudos => {
+ "naemon-postgresql-database-public" => "naemon ALL=(postgres) NOPASSWD: /usr/bin/psql -c select\ nspname\ from\ pg_catalog.pg_namespace"
+ },
+ local => {
+ check_command => "check_command_output!psql -c 'select nspname from pg_catalog.pg_namespace'!public!-r postgres",
+ }
+ }
}
},
]
}
+
+ @profile::monitoring::local_service { "Last postgresql dump in $pg_backup_path is not too old":
+ sudos => {
+ "naemon-postgresql-dumps" => "naemon ALL=($pg_user) NOPASSWD: /usr/bin/find $pg_backup_path -mindepth 1 -maxdepth 1 -printf %T@\\n",
+ }
+ local => {
+ check_command => "check_last_file_date!$pg_backup_path!10!$pg_user"
+ }
+ }
}
content => "${pg_infos[pgbouncer_dbname]} = host=$pg_path$pg_port user=${pg_infos[dbuser]} dbname=${pg_infos[dbname]}",
}
+ # FIXME: current pam configuration requires password for postgres
+ # @profile::monitoring::local_service { "Database ${pg_infos[pgbouncer_dbname]} is available in pgbouncer":
+ # sudos => {
+ # "naemon-postgresql-database-public" => "naemon ALL=(postgres) NOPASSWD: /usr/bin/psql -c select\ nspname\ from\ pg_catalog.pg_namespace ${pg_infos[pgbouncer_dbname]}"
+ # },
+ # local => {
+ # check_command => "check_command_output!psql -c 'select nspname from pg_catalog.pg_namespace' ${pg_infos[pgbouncer_dbname]}!public!-r postgres",
+ # }
+ # }
+
# pg_hba for accessed cluster
postgresql::server::pg_hba_rule { "$pg_backup_host - local access as ${pg_infos[dbuser]} user":
description => "Allow local access to ${pg_infos[dbuser]} user",
handle_slot => true,
add_self_role => true,
}
+
+ @profile::monitoring::local_service { "Postgresql replication for $backup_host is up to date":
+ sudos => {
+ "naemon-postgresql-replication-$backup_host" => "naemon ALL=(postgres) NOPASSWD: /etc/naemon/monitoring-plugins/check_postgres_replication $backup_host /run/postgresql 5432"
+
+ },
+ local => {
+ check_command => "check_postgresql_replication!$backup_host!/run/postgresql/!5432",
+ }
+ }
}
}
}
}
+ # FIXME: add monitoring for ssl
}