]> git.immae.eu Git - perso/Immae/Projets/Puppet.git/commitdiff
Add postgresql monitoring
authorIsmaël Bouya <ismael.bouya@normalesup.org>
Sun, 8 Jul 2018 19:51:30 +0000 (21:51 +0200)
committerIsmaël Bouya <ismael.bouya@normalesup.org>
Tue, 10 Jul 2018 08:26:39 +0000 (10:26 +0200)
modules/profile/files/monitoring/check_last_file_date [new file with mode: 0644]
modules/profile/files/monitoring/check_postgres_replication [new file with mode: 0644]
modules/profile/manifests/monitoring/commands.pp
modules/profile/manifests/monitoring/local_service.pp
modules/profile/manifests/monitoring/services.pp
modules/profile/manifests/postgresql.pp
modules/profile/manifests/postgresql/backup_dump.pp
modules/profile/manifests/postgresql/backup_pgbouncer.pp
modules/profile/manifests/postgresql/master.pp
modules/profile/manifests/postgresql/ssl.pp

diff --git a/modules/profile/files/monitoring/check_last_file_date b/modules/profile/files/monitoring/check_last_file_date
new file mode 100644 (file)
index 0000000..8eabb57
--- /dev/null
@@ -0,0 +1,31 @@
+#!/bin/bash
+STATE_OK=0
+STATE_WARNING=1
+STATE_CRITICAL=2
+STATE_UNKNOWN=3
+  
+base_path=$1
+hours=$2
+as_user=$3
+
+if [ -z "$as_user" ]; then
+  last_date=$(find $base_path -mindepth 1 -maxdepth 1 -printf "%T@\n" 2>/dev/null | sort | tail -n 1)
+else
+  last_date=$(sudo -u "$as_user" find $base_path -mindepth 1 -maxdepth 1 -printf "%T@\n" 2>/dev/null | sort | tail -n 1)
+fi
+
+if [ -z "$last_date" ]; then
+  echo "UNKNOWN: Could not read folder"
+  exit $STATE_UNKNOWN
+else
+  LC_ALL=C last_date=$(printf "%.*f" 0 $last_date)
+  min_date=$(date -d "$hours hours ago" "+%s")
+  if [ "$min_date" -lt "$last_date" ]; then
+    echo "OK: Last backup $(date -d @$last_date)"
+    exit $STATE_OK
+  else
+    echo "CRITICAL: Last backup $(date -d @$last_date)"
+    exit $STATE_CRITICAL
+  fi
+fi
diff --git a/modules/profile/files/monitoring/check_postgres_replication b/modules/profile/files/monitoring/check_postgres_replication
new file mode 100644 (file)
index 0000000..163c68a
--- /dev/null
@@ -0,0 +1,35 @@
+#!/bin/bash
+STATE_OK=0
+STATE_WARNING=1
+STATE_CRITICAL=2
+STATE_UNKNOWN=3
+  
+user=$1
+host=$2
+port=$3
+
+lag=$(psql -h $host -p $port -A -t -c "SELECT COALESCE(EXTRACT(EPOCH FROM replay_lag),0) FROM pg_stat_replication WHERE usename='$user'" 2>/dev/null)
+exit_code=$?
+
+if [[ $exit_code -ne 0 ]]; then
+  echo "UNKNOWN:Impossible to run psql command"
+  exit $STATE_UNKNOWN
+elif [[ -z "$lag" ]]; then
+  echo "UNKNOWN:No replication found for $user"
+  exit $STATE_UNKNOWN
+else
+  output="Replication lag for $user is ${lag}s"
+  LC_ALL=C lag=$(printf "%.*f" 0 $lag)
+
+  if [[ $lag -lt 5 ]]; then
+    echo "OK:$output"
+    exit $STATE_OK
+  elif [[ $lag -lt 10 ]]; then
+    echo "WARNING:$output"
+    exit $STATE_WARNING
+  else
+    echo "CRITICAL:$output"
+    exit $STATE_CRITICAL
+  fi
+fi
index a25f7b3e6af655e42da1523e7c1252a53c9ee2ab..4ccc816ccbccb4483d75ef64ded962464ad45e80 100644 (file)
@@ -3,25 +3,24 @@ class profile::monitoring::commands inherits profile::monitoring {
 
   file { "/etc/naemon/monitoring-plugins":
     ensure => "directory",
-    owner  => "naemon",
-    group  => "naemon",
-    mode   => "0700",
-  }
-
-  file { "/etc/naemon/monitoring-plugins/check_command":
-    ensure => "present",
-    owner  => "naemon",
+    owner  => "root",
     group  => "naemon",
-    mode   => "0700",
-    source => "puppet:///modules/profile/monitoring/check_command",
+    mode   => "0755",
   }
 
-  file { "/etc/naemon/monitoring-plugins/check_md_raid":
-    ensure => "present",
-    owner  => "naemon",
-    group  => "naemon",
-    mode   => "0700",
-    source => "puppet:///modules/profile/monitoring/check_md_raid",
+  [
+    "check_command",
+    "check_md_raid",
+    "check_postgres_replication",
+    "check_last_file_date",
+  ].each |$file| {
+    file { "/etc/naemon/monitoring-plugins/$file":
+      ensure => "present",
+      owner  => "root",
+      group  => "naemon",
+      mode   => "0755",
+      source => "puppet:///modules/profile/monitoring/$file",
+    }
   }
 
   Nagios_command {
@@ -48,10 +47,17 @@ class profile::monitoring::commands inherits profile::monitoring {
     "check_ntp":
       command_line => '$USER1$/check_ntp_time -H 0.arch.pool.ntp.org';
     "check_md_raid":
-      command_line => '$USER2$/check_md_raid';
+      command_line => '$USER2$/check_md_raid',
+      require      => File["/etc/naemon/monitoring-plugins/check_md_raid"];
     "check_command_output":
       command_line => '$USER2$/check_command -c "$ARG1$" -o "$ARG2$" $ARG3$',
       require      => File["/etc/naemon/monitoring-plugins/check_command"];
+    "check_postgresql_replication":
+      command_line => '/usr/bin/sudo -u postgres $USER2$/check_postgres_replication "$ARG1$" "$ARG2$" "$ARG3$"',
+      require      => File["/etc/naemon/monitoring-plugins/check_postgres_replication"];
+    "check_last_file_date":
+      command_line => '$USER2$/check_last_file_date "$ARG1$" "$ARG2$" "$ARG3$"',
+      require      => File["/etc/naemon/monitoring-plugins/check_last_file_date"],
   }
 
   unless empty($naemon_url) {
index 3b39d1f9b39ce924189eccbdb9fdd3a846d0d88b..0caf72ee941475ce2424559246bc4bc2eb4645eb 100644 (file)
@@ -1,4 +1,5 @@
 define profile::monitoring::local_service (
+  Optional[Hash] $sudos  = {},
   Optional[Hash] $common = {},
   Optional[Hash] $master = {},
   Optional[Hash] $local  = {},
@@ -6,6 +7,15 @@ define profile::monitoring::local_service (
   $service_description = $title
   $real_hostname = lookup("base_installation::real_hostname")
 
+  Nagios_service {
+    ensure  => "present",
+    owner   => "naemon",
+    group   => "naemon",
+    notify  => Service["naemon"],
+    before  => Service["naemon"],
+    require => File["/etc/naemon"],
+  }
+
   $service_generic = {
     active_checks_enabled        => "1",
     check_freshness              => "0",
@@ -35,6 +45,12 @@ define profile::monitoring::local_service (
     retry_interval     => "1",
     })
 
+  $sudos.each |$sudo_name, $content| {
+    sudo::conf { $sudo_name:
+      content        => $content,
+      before         => Nagios_service[$service_description],
+    }
+  }
 
   [true, false].each |$services_for_master| {
     if $services_for_master {
index 6e59ab1f91c21937c611f2b6ed3bf93a7e5e09df..b20a3528546c1ad6bf7c142feb099b04cb7c7850 100644 (file)
@@ -1,21 +1,5 @@
 class profile::monitoring::services {
 
-  Nagios_service {
-    ensure  => "present",
-    owner   => "naemon",
-    group   => "naemon",
-    notify  => Service["naemon"],
-    before  => Service["naemon"],
-    require => File["/etc/naemon"],
-  }
-
-  sudo::conf {
-    default:
-      sudo_file_name => "naemon";
-    'naemon-fail2ban':
-      content => "naemon  ALL=(root) NOPASSWD: /usr/bin/fail2ban-client ping";
-  }
-
   profile::monitoring::local_service {
     "Size on root partition":
       local => {
@@ -34,9 +18,11 @@ class profile::monitoring::services {
         check_command => "check_local_swap!20!10",
       };
     "fail2ban is active":
-      local => {
+      sudos   => {
+        "naemon-fail2ban" => "naemon  ALL=(root) NOPASSWD: /usr/bin/fail2ban-client ping",
+      },
+      local   => {
         check_command => "check_command_output!fail2ban-client ping!pong!-r root",
-        require       => Sudo::Conf["naemon-fail2ban"],
       };
     "NTP is activated and working":
       local => {
@@ -50,4 +36,6 @@ class profile::monitoring::services {
         check_command => "check_md_raid",
       };
   }
+
+  Profile::Monitoring::Local_service <| |>
 }
index 97ce57291b6bbf4abf9de7c2e04932b5dbe04435..edd6ea64d0e9c461d08e24add62e4c908393f7fc 100644 (file)
@@ -28,5 +28,13 @@ class profile::postgresql (
 
   profile::postgresql::base_pg_hba_rules { "default": }
 
+  @profile::monitoring::local_service { "Databases are present in postgresql":
+    sudos => {
+      "naemon-postgresql-database-public" => "naemon ALL=(postgres) NOPASSWD: /usr/bin/psql -c select\ nspname\ from\ pg_catalog.pg_namespace"
+    },
+    local => {
+      check_command => "check_command_output!psql -c 'select nspname from pg_catalog.pg_namespace'!public!-r postgres",
+    }
+  }
 }
 
index 53fb20ee0b3b2336ac7e3fd241252ceb09f44833..5e469c5c213c893afef1fb8cd1164c26155e080e 100644 (file)
@@ -57,4 +57,13 @@ define profile::postgresql::backup_dump (
       },
     ]
   }
+
+  @profile::monitoring::local_service { "Last postgresql dump in $pg_backup_path is not too old":
+    sudos => {
+      "naemon-postgresql-dumps" => "naemon  ALL=($pg_user) NOPASSWD: /usr/bin/find $pg_backup_path -mindepth 1 -maxdepth 1 -printf %T@\\n",
+    }
+    local => {
+      check_command => "check_last_file_date!$pg_backup_path!10!$pg_user"
+    }
+  }
 }
index 45b8ed5a5528a89cfde528dda73bc421441e4b02..c82eefdedf954be3068f68a6888d95488e434d43 100644 (file)
@@ -48,6 +48,16 @@ define profile::postgresql::backup_pgbouncer (
     content => "${pg_infos[pgbouncer_dbname]} = host=$pg_path$pg_port user=${pg_infos[dbuser]} dbname=${pg_infos[dbname]}",
   }
 
+  # FIXME: current pam configuration requires password for postgres
+  # @profile::monitoring::local_service { "Database ${pg_infos[pgbouncer_dbname]} is available in pgbouncer":
+  #   sudos => {
+  #     "naemon-postgresql-database-public" => "naemon ALL=(postgres) NOPASSWD: /usr/bin/psql -c select\ nspname\ from\ pg_catalog.pg_namespace ${pg_infos[pgbouncer_dbname]}"
+  #   },
+  #   local => {
+  #     check_command => "check_command_output!psql -c 'select nspname from pg_catalog.pg_namespace' ${pg_infos[pgbouncer_dbname]}!public!-r postgres",
+  #   }
+  # }
+
   # pg_hba for accessed cluster
   postgresql::server::pg_hba_rule { "$pg_backup_host - local access as ${pg_infos[dbuser]} user":
     description => "Allow local access to ${pg_infos[dbuser]} user",
index 02315a615d7e771edaba1e6828582099bd387142..e775eb439b3c0bcf4bff79ec318a25448e8ca5a4 100644 (file)
@@ -59,5 +59,15 @@ define profile::postgresql::master (
       handle_slot   => true,
       add_self_role => true,
     }
+
+    @profile::monitoring::local_service { "Postgresql replication for $backup_host is up to date":
+      sudos => {
+        "naemon-postgresql-replication-$backup_host" => "naemon ALL=(postgres) NOPASSWD: /etc/naemon/monitoring-plugins/check_postgres_replication $backup_host /run/postgresql 5432"
+
+      },
+      local => {
+        check_command => "check_postgresql_replication!$backup_host!/run/postgresql/!5432",
+      }
+    }
   }
 }
index dc56c0bd61e77cfba9f4c7d294988167d9472bce..9b0a95cf41cd29ec6744e93bdbb7595e33feb51f 100644 (file)
@@ -79,4 +79,5 @@ define profile::postgresql::ssl (
     }
   }
 
+  # FIXME: add monitoring for ssl
 }