]> git.immae.eu Git - perso/Immae/Config/Nix.git/commitdiff
Add mysql and redis monitoring
authorIsmaël Bouya <ismael.bouya@normalesup.org>
Sat, 7 Dec 2019 14:40:15 +0000 (15:40 +0100)
committerIsmaël Bouya <ismael.bouya@normalesup.org>
Sat, 7 Dec 2019 14:40:15 +0000 (15:40 +0100)
modules/private/databases/mariadb_replication.nix
modules/private/monitoring/default.nix
modules/private/monitoring/objects_backup-2.nix
modules/private/monitoring/objects_common.nix
modules/private/monitoring/plugins/check_mysql_replication [new file with mode: 0755]
modules/private/monitoring/plugins/check_redis_replication [new file with mode: 0755]

index 5f97e84aa4bea632dbe7b6b944ef618741c5b15f..52a34d3e20d9fc91671f7e7b766f93bce205f385 100644 (file)
@@ -114,6 +114,17 @@ in
           password = ${hcfg.dumpPassword}
         '';
       }
           password = ${hcfg.dumpPassword}
         '';
       }
+      {
+        dest = "mysql_replication/${name}/client";
+        permissions = "0400";
+        user = "mysql";
+        group = "mysql";
+        text = ''
+          [client]
+          user = ${hcfg.dumpUser}
+          password = ${hcfg.dumpPassword}
+        '';
+      }
     ]) cfg.hosts);
 
     services.cron = {
     ]) cfg.hosts);
 
     services.cron = {
index baeebc9e5231583945300a7a9ccfbd73d8131ae8..b3f8cbee734cf53f3c672e88171a2f036282fc4d 100644 (file)
@@ -16,6 +16,12 @@ let
     wrapProgram $out/check_postgres_replication --prefix PATH : ${lib.makeBinPath [
       pkgs.postgresql
     ]}
     wrapProgram $out/check_postgres_replication --prefix PATH : ${lib.makeBinPath [
       pkgs.postgresql
     ]}
+    wrapProgram $out/check_redis_replication --prefix PATH : ${lib.makeBinPath [
+      pkgs.gnugrep pkgs.coreutils pkgs.redis
+    ]}
+    wrapProgram $out/check_mysql_replication --prefix PATH : ${lib.makeBinPath [
+      pkgs.gnugrep pkgs.gnused pkgs.coreutils pkgs.mariadb
+    ]}
     '';
   toObjects = pkgs.callPackage ./to_objects.nix {};
   commonConfig = {
     '';
   toObjects = pkgs.callPackage ./to_objects.nix {};
   commonConfig = {
@@ -74,11 +80,20 @@ in
       }
       {
         commands = [
       }
       {
         commands = [
+          { command = "${myplugins}/check_mysql_replication *"; options = [ "NOPASSWD" ]; }
           { command = "${myplugins}/check_last_file_date /backup2/*"; options = [ "NOPASSWD" ]; }
         ];
         users = [ "naemon" ];
         runAs = "mysql";
       }
           { command = "${myplugins}/check_last_file_date /backup2/*"; options = [ "NOPASSWD" ]; }
         ];
         users = [ "naemon" ];
         runAs = "mysql";
       }
+      {
+        commands = [
+          { command = "${myplugins}/check_redis_replication *"; options = [ "NOPASSWD" ]; }
+          { command = "${myplugins}/check_last_file_date /backup2/*"; options = [ "NOPASSWD" ]; }
+        ];
+        users = [ "naemon" ];
+        runAs = "redis";
+      }
       {
         commands = [
           { command = "${myplugins}/check_last_file_date /backup2/*"; options = [ "NOPASSWD" ]; }
       {
         commands = [
           { command = "${myplugins}/check_last_file_date /backup2/*"; options = [ "NOPASSWD" ]; }
index fad67630f693256b728df21610524625cb599106..c302e453f08663f74fa26514beb688074e9bc914 100644 (file)
       use = "local-service";
       check_command = ["check_last_file_date" "/backup2/eldiron/postgresql_backup" "7" "postgres"];
     }
       use = "local-service";
       check_command = ["check_last_file_date" "/backup2/eldiron/postgresql_backup" "7" "postgres"];
     }
+    {
+      service_description = "Redis replication for eldiron is up to date";
+      use = "local-service";
+      check_command = ["check_redis_replication" "/run/redis_eldiron/redis.sock"];
+    }
+    {
+      service_description = "Last redis dump in /backup2/eldiron/redis_backup is not too old";
+      use = "local-service";
+      check_command = ["check_last_file_date" "/backup2/eldiron/redis_backup" "7" "redis"];
+    }
+    {
+      service_description = "Mysql replication for eldiron is up to date";
+      use = "local-service";
+      check_command = ["check_mysql_replication" "/run/mysqld_eldiron/mysqld.sock" "/var/secrets/mysql_replication/eldiron/client"];
+    }
     {
       service_description = "Last mysql dump in /backup2/eldiron/mysql_backup is not too old";
       use = "local-service";
     {
       service_description = "Last mysql dump in /backup2/eldiron/mysql_backup is not too old";
       use = "local-service";
index 8466fdb277a5a45fcc49aaf5119adaaddc075a14..1ab9fc3e179d3ced392eaefe714526c1c3486a67 100644 (file)
@@ -74,7 +74,9 @@
     check_memory = "$USER2$/check_mem.sh -w $ARG1$ -c $ARG2$";
     check_command_output = "$USER2$/check_command -c \"$ARG1$\" -s 0 -o \"$ARG2$\" $ARG3$";
     check_ntp = "$USER1$/check_ntp_time -t 30 -q -H 0.arch.pool.ntp.org";
     check_memory = "$USER2$/check_mem.sh -w $ARG1$ -c $ARG2$";
     check_command_output = "$USER2$/check_command -c \"$ARG1$\" -s 0 -o \"$ARG2$\" $ARG3$";
     check_ntp = "$USER1$/check_ntp_time -t 30 -q -H 0.arch.pool.ntp.org";
+    check_mysql_replication = "${sudo} -u mysql $USER2$/check_mysql_replication \"$ARG1$\" \"$ARG2$\"";
     check_postgresql_replication = "${sudo} -u postgres $USER2$/check_postgres_replication \"$ARG1$\" \"$ARG2$\" \"$ARG3$\"";
     check_postgresql_replication = "${sudo} -u postgres $USER2$/check_postgres_replication \"$ARG1$\" \"$ARG2$\" \"$ARG3$\"";
+    check_redis_replication = "${sudo} -u redis $USER2$/check_redis_replication \"$ARG1$\"";
     check_mailq = "$USER1$/check_mailq -s -w 1 -c 2";
 
     check_host_alive = "$USER1$/check_ping -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 5";
     check_mailq = "$USER1$/check_mailq -s -w 1 -c 2";
 
     check_host_alive = "$USER1$/check_ping -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 5";
diff --git a/modules/private/monitoring/plugins/check_mysql_replication b/modules/private/monitoring/plugins/check_mysql_replication
new file mode 100755 (executable)
index 0000000..8923928
--- /dev/null
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+STATE_OK=0
+STATE_WARNING=1
+STATE_CRITICAL=2
+STATE_UNKNOWN=3
+
+socket=$1
+config_file=$2
+info=$(mysql --defaults-file=${config_file} -S $socket -e "show slave status" --vertical)
+exit_code=$?
+
+lag=$(echo "$info" | grep Seconds_Behind_Master | cut -d':' -f2 | sed -e "s/\s//g")
+
+if [[ $exit_code -ne 0 ]]; then
+  echo "UNKNOWN - Impossible to run mysql command"
+  exit $STATE_UNKNOWN
+elif [[ -z "$lag" ]]; then
+  echo "UNKNOWN - No replication found for mysql"
+  exit $STATE_UNKNOWN
+else
+  output="Replication lag for mysql is ${lag}s"
+  LC_ALL=C lag=$(printf "%.*f" 0 $lag)
+
+  if [[ $lag -lt 5 ]]; then
+    echo "OK - $output"
+    exit $STATE_OK
+  elif [[ $lag -lt 10 ]]; then
+    echo "WARNING - $output"
+    exit $STATE_WARNING
+  else
+    echo "CRITICAL - $output"
+    exit $STATE_CRITICAL
+  fi
+fi
diff --git a/modules/private/monitoring/plugins/check_redis_replication b/modules/private/monitoring/plugins/check_redis_replication
new file mode 100755 (executable)
index 0000000..7a884e1
--- /dev/null
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+STATE_OK=0
+STATE_WARNING=1
+STATE_CRITICAL=2
+STATE_UNKNOWN=3
+
+socket=$1
+
+info=$(redis-cli -s $socket info replication)
+lag=$(echo "$info" | grep master_last_io_seconds_ago | cut -d":" -f2 | sed -e "s/\s//g")
+slave_offset=$(echo "$info" | grep slave_repl_offset | cut -d":" -f2 | sed -e "s/\s//g")
+master_offset=$(echo "$info" | grep master_repl_offset | cut -d":" -f2 | sed -e "s/\s//g")
+offset=$(($master_offset - $slave_offset))
+
+exit_code=$?
+
+if [[ $exit_code -ne 0 ]]; then
+  echo "UNKNOWN - Impossible to run redis command"
+  exit $STATE_UNKNOWN
+elif [[ -z "$lag" ]]; then
+  echo "UNKNOWN - No replication found"
+  exit $STATE_UNKNOWN
+else
+  output="Replication lag for redis is ${lag}s and offset is ${offset}"
+  LC_ALL=C lag=$(printf "%.*f" 0 $lag)
+
+  if [[ $lag -lt 5 && $offset -lt 5 ]]; then
+    echo "OK - $output"
+    exit $STATE_OK
+  elif [[ $lag -lt 10 && $offset -lt 10 ]]; then
+    echo "WARNING - $output"
+    exit $STATE_WARNING
+  else
+    echo "CRITICAL - $output"
+    exit $STATE_CRITICAL
+  fi
+fi