]> git.immae.eu Git - perso/Immae/Config/Nix.git/commitdiff
Add monitoring for backup-2
authorIsmaël Bouya <ismael.bouya@normalesup.org>
Sun, 1 Dec 2019 17:25:16 +0000 (18:25 +0100)
committerIsmaël Bouya <ismael.bouya@normalesup.org>
Sun, 1 Dec 2019 17:25:16 +0000 (18:25 +0100)
modules/private/monitoring/conf/specific_backup-2.cfg [new file with mode: 0644]
modules/private/monitoring/conf/specific_eldiron.cfg [new file with mode: 0644]
modules/private/monitoring/default.nix
modules/private/monitoring/plugins/check_last_file_date [new file with mode: 0755]
modules/private/monitoring/plugins/check_postgres_replication [new file with mode: 0755]
modules/private/system/backup-2.nix
modules/private/system/eldiron.nix

diff --git a/modules/private/monitoring/conf/specific_backup-2.cfg b/modules/private/monitoring/conf/specific_backup-2.cfg
new file mode 100644 (file)
index 0000000..ff91322
--- /dev/null
@@ -0,0 +1,36 @@
+# vim: filetype=nagios
+
+define service {
+  service_description    Size on /backup2 partition
+  check_command          check_local_disk!10%!5%!/backup2
+  use                    local-service
+}
+
+define command {
+  command_line           /run/wrappers/bin/sudo -u "$ARG3$" $USER2$/check_last_file_date "$ARG1$" "$ARG2$"
+  command_name           check_last_file_date
+}
+
+define service {
+  service_description    Last backup in /backup2/phare is not too old
+  check_command          check_last_file_date!/backup2/phare!14!backup
+  use                    local-service
+}
+
+define service {
+  service_description    Last backup in /backup2/immae_eu is not too old
+  check_command          check_last_file_date!/backup2/immae_eu!14!backup
+  use                    local-service
+}
+
+define service {
+  service_description    Last backup in /backup2/immae_fr is not too old
+  check_command          check_last_file_date!/backup2/immae_fr!14!backup
+  use                    local-service
+}
+
+define service {
+  service_description    Last postgresql dump in /backup2/eldiron/postgresql_backup is not too old
+  check_command          check_last_file_date!/backup2/eldiron/postgresql_backup!7!postgres
+  use                    local-service
+}
diff --git a/modules/private/monitoring/conf/specific_eldiron.cfg b/modules/private/monitoring/conf/specific_eldiron.cfg
new file mode 100644 (file)
index 0000000..fd5a43d
--- /dev/null
@@ -0,0 +1,29 @@
+# vim: filetype=nagios
+#
+define command {
+  command_line  /run/wrappers/bin/sudo -u postgres $USER2$/check_postgres_replication "$ARG1$" "$ARG2$" "$ARG3$"
+  command_name  check_postgresql_replication
+}
+
+define service {
+  service_description   Postgresql replication for backup-1 is up to date
+  check_command         check_postgresql_replication!backup-1!/run/postgresql!5432
+  use                   local-service
+}
+
+define service {
+  service_description   Postgresql replication for backup-2 is up to date
+  check_command         check_postgresql_replication!backup-2!/run/postgresql!5432
+  use                   local-service
+}
+
+define service {
+  service_description  mailq is empty
+  use                  local-service
+  check_command        check_mailq
+}
+
+define command {
+  command_name        check_mailq
+  command_line        $USER1$/check_mailq -s -w 1 -c 2
+}
index c5acd4004b63aadc3d4e2164dde5471acace57be..6062abab2e1689e5c02eb57983ad84f0c49e5273 100644 (file)
@@ -1,4 +1,4 @@
-{ config, myconfig, pkgs, lib, ... }:
+{ config, myconfig, pkgs, lib, name, hostFQDN, ... }:
 let
   myplugins = pkgs.runCommand "buildplugins" {
     buildInputs = [ pkgs.makeWrapper pkgs.perl ];
@@ -13,16 +13,57 @@ let
     wrapProgram $out/check_mem.sh --prefix PATH : ${lib.makeBinPath [
       pkgs.gnugrep pkgs.gawk pkgs.procps-ng
     ]}
+    wrapProgram $out/check_postgres_replication --prefix PATH : ${lib.makeBinPath [
+      pkgs.postgresql
+    ]}
     '';
+  defaultObjects =
+    let specific_file = ./conf + "/specific_" + name + ".cfg";
+    in
+        builtins.readFile ./conf/local_services.cfg
+      + builtins.readFile ./conf/timeperiods.cfg
+      + builtins.readFile ./conf/services.cfg
+      + builtins.readFile ./conf/contacts.cfg
+      + builtins.readFile ./conf/hosts.cfg
+      + ''
+        define command {
+          command_line       ${myplugins}/send_nrdp.sh -u "$USER200$" -t "$USER201$" -H "$HOSTADDRESS$" -s "$SERVICEDESC$" -S "$SERVICESTATEID$" -o "$SERVICEOUTPUT$"
+          command_name       notify-master
+        }
+        define service {
+          service_description  No mdadm array is degraded
+          use                  local-service
+          check_command        check_command_output!${pkgs.mdadm}/bin/mdadm --monitor --scan -1!^$!-s 0 -r root
+        }
+
+        define service {
+          name                local-service
+          use                 generic-service
+          host_name           ${hostFQDN}
+          check_interval      5
+          max_check_attempts  4
+          register            0
+          retry_interval      1
+        }
+        define host {
+          host_name           ${hostFQDN}
+          alias               ${hostFQDN}
+          address             ${hostFQDN}
+          use                 linux-server
+        }
+        ''
+      + lib.strings.optionalString (builtins.pathExists specific_file) (builtins.readFile specific_file);
 in
 {
   options = {
-    myServices.monitoring.enable = lib.mkOption {
-      type = lib.types.bool;
-      default = false;
-      description = ''
-        Whether to enable monitoring.
-      '';
+    myServices.monitoring = {
+      enable = lib.mkOption {
+        type = lib.types.bool;
+        default = false;
+        description = ''
+          Whether to enable monitoring.
+        '';
+      };
     };
   };
 
@@ -39,6 +80,21 @@ in
         users = [ "naemon" ];
         runAs = "root";
       }
+      {
+        commands = [
+          { command = "${myplugins}/check_postgres_replication *"; options = [ "NOPASSWD" ]; }
+          { command = "${myplugins}/check_last_file_date /backup2/*"; options = [ "NOPASSWD" ]; }
+        ];
+        users = [ "naemon" ];
+        runAs = "postgres";
+      }
+      {
+        commands = [
+          { command = "${myplugins}/check_last_file_date /backup2/*"; options = [ "NOPASSWD" ]; }
+        ];
+        users = [ "naemon" ];
+        runAs = "backup";
+      }
     ];
     environment.etc."mdadm.conf" = {
       enable = true;
@@ -66,49 +122,7 @@ in
         $USER200$=${myconfig.env.monitoring.status_url}
         $USER201$=${myconfig.env.monitoring.status_token}
       '';
-      objectDefs = builtins.readFile ./conf/local_services.cfg
-        + builtins.readFile ./conf/timeperiods.cfg
-        + builtins.readFile ./conf/services.cfg
-        + builtins.readFile ./conf/contacts.cfg
-        + builtins.readFile ./conf/hosts.cfg
-        + ''
-          define command {
-            command_line       ${myplugins}/send_nrdp.sh -u "$USER200$" -t "$USER201$" -H "$HOSTADDRESS$" -s "$SERVICEDESC$" -S "$SERVICESTATEID$" -o "$SERVICEOUTPUT$"
-            command_name       notify-master
-          }
-          define service {
-            service_description  No mdadm array is degraded
-            use                  local-service
-            check_command        check_command_output!${pkgs.mdadm}/bin/mdadm --monitor --scan -1!^$!-s 0 -r root
-          }
-
-          define service {
-            service_description  mailq is empty
-            use                  local-service
-            check_command        check_mailq
-          }
-
-          define command {
-            command_name        check_mailq
-            command_line        $USER1$/check_mailq -s -w 1 -c 2
-          }
-
-          define service {
-            name                local-service
-            use                 generic-service
-            host_name           eldiron.immae.eu
-            check_interval      5
-            max_check_attempts  4
-            register            0
-            retry_interval      1
-          }
-          define host {
-            host_name           eldiron.immae.eu
-            alias               eldiron.immae.eu
-            address             eldiron.immae.eu
-            use                 linux-server
-          }
-          '';
+      objectDefs = defaultObjects;
     };
   };
 }
diff --git a/modules/private/monitoring/plugins/check_last_file_date b/modules/private/monitoring/plugins/check_last_file_date
new file mode 100755 (executable)
index 0000000..df45bbc
--- /dev/null
@@ -0,0 +1,26 @@
+#!/bin/bash
+STATE_OK=0
+STATE_WARNING=1
+STATE_CRITICAL=2
+STATE_UNKNOWN=3
+  
+base_path=$1
+hours=$2
+
+last_date=$(find $base_path -mindepth 1 -maxdepth 1 -printf "%T@\n" 2>/dev/null | sort | tail -n 1)
+
+if [ -z "$last_date" ]; then
+  echo "UNKNOWN: Could not read folder"
+  exit $STATE_UNKNOWN
+else
+  LC_ALL=C last_date=$(printf "%.*f" 0 $last_date)
+  min_date=$(date -d "$hours hours ago" "+%s")
+  if [ "$min_date" -lt "$last_date" ]; then
+    echo "OK: Last file $(date -d @$last_date)"
+    exit $STATE_OK
+  else
+    echo "CRITICAL: Last file $(date -d @$last_date)"
+    exit $STATE_CRITICAL
+  fi
+fi
diff --git a/modules/private/monitoring/plugins/check_postgres_replication b/modules/private/monitoring/plugins/check_postgres_replication
new file mode 100755 (executable)
index 0000000..009b4d5
--- /dev/null
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+STATE_OK=0
+STATE_WARNING=1
+STATE_CRITICAL=2
+STATE_UNKNOWN=3
+
+user=$1
+host=$2
+port=$3
+
+lag=$(psql -h $host -p $port -A -t -c "SELECT COALESCE(EXTRACT(EPOCH FROM replay_lag),0) FROM pg_stat_replication WHERE usename='$user'" 2>/dev/null)
+exit_code=$?
+
+if [[ $exit_code -ne 0 ]]; then
+  echo "UNKNOWN - Impossible to run psql command"
+  exit $STATE_UNKNOWN
+elif [[ -z "$lag" ]]; then
+  echo "UNKNOWN - No replication found for $user"
+  exit $STATE_UNKNOWN
+else
+  output="Replication lag for $user is ${lag}s"
+  LC_ALL=C lag=$(printf "%.*f" 0 $lag)
+
+  if [[ $lag -lt 5 ]]; then
+    echo "OK - $output"
+    exit $STATE_OK
+  elif [[ $lag -lt 10 ]]; then
+    echo "WARNING - $output"
+    exit $STATE_WARNING
+  else
+    echo "CRITICAL - $output"
+    exit $STATE_CRITICAL
+  fi
+fi
index 1c5b7d84b383cebbd86a839c3872d5f1dd670d7c..3120a5715086ddbc96b3bedec54522eaccd36c48 100644 (file)
@@ -3,6 +3,7 @@
 {
   boot.kernelPackages = pkgs.linuxPackages_latest;
   _module.args.privateFiles = privateFiles;
+  _module.args.hostFQDN = "backup-2.v.immae.eu";
   imports = builtins.attrValues (import ../..);
 
   deployment = {
@@ -48,6 +49,7 @@
     ssh_key_private = myconfig.env.rsync_backup.ssh_key.private;
   };
 
+  myServices.monitoring.enable = true;
   myServices.databasesReplication = {
     postgresql = {
       enable = true;
index 5384bc2e8c418c4bbf0bd26e38be90527100e849..d79cf166dc5e889170988a8f5e1639b1201a5fb7 100644 (file)
@@ -3,6 +3,7 @@
 {
   boot.kernelPackages = pkgs.linuxPackages_latest;
   _module.args.privateFiles = privateFiles;
+  _module.args.hostFQDN = "eldiron.immae.eu";
 
   networking = {
     firewall.enable = true;