From 6015a3b52c3b155ac444aeb39950c38a5e653101 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Isma=C3=ABl=20Bouya?= Date: Sat, 7 Dec 2019 15:40:15 +0100 Subject: [PATCH] Add mysql and redis monitoring --- .../private/databases/mariadb_replication.nix | 11 ++++++ modules/private/monitoring/default.nix | 15 ++++++++ .../private/monitoring/objects_backup-2.nix | 15 ++++++++ modules/private/monitoring/objects_common.nix | 2 + .../plugins/check_mysql_replication | 35 +++++++++++++++++ .../plugins/check_redis_replication | 38 +++++++++++++++++++ 6 files changed, 116 insertions(+) create mode 100755 modules/private/monitoring/plugins/check_mysql_replication create mode 100755 modules/private/monitoring/plugins/check_redis_replication diff --git a/modules/private/databases/mariadb_replication.nix b/modules/private/databases/mariadb_replication.nix index 5f97e84..52a34d3 100644 --- a/modules/private/databases/mariadb_replication.nix +++ b/modules/private/databases/mariadb_replication.nix @@ -114,6 +114,17 @@ in password = ${hcfg.dumpPassword} ''; } + { + dest = "mysql_replication/${name}/client"; + permissions = "0400"; + user = "mysql"; + group = "mysql"; + text = '' + [client] + user = ${hcfg.dumpUser} + password = ${hcfg.dumpPassword} + ''; + } ]) cfg.hosts); services.cron = { diff --git a/modules/private/monitoring/default.nix b/modules/private/monitoring/default.nix index baeebc9..b3f8cbe 100644 --- a/modules/private/monitoring/default.nix +++ b/modules/private/monitoring/default.nix @@ -16,6 +16,12 @@ let wrapProgram $out/check_postgres_replication --prefix PATH : ${lib.makeBinPath [ pkgs.postgresql ]} + wrapProgram $out/check_redis_replication --prefix PATH : ${lib.makeBinPath [ + pkgs.gnugrep pkgs.coreutils pkgs.redis + ]} + wrapProgram $out/check_mysql_replication --prefix PATH : ${lib.makeBinPath [ + pkgs.gnugrep pkgs.gnused pkgs.coreutils pkgs.mariadb + ]} ''; toObjects = pkgs.callPackage ./to_objects.nix {}; commonConfig = { @@ -74,11 +80,20 @@ in } { commands = [ + { command = "${myplugins}/check_mysql_replication *"; options = [ "NOPASSWD" ]; } { command = "${myplugins}/check_last_file_date /backup2/*"; options = [ "NOPASSWD" ]; } ]; users = [ "naemon" ]; runAs = "mysql"; } + { + commands = [ + { command = "${myplugins}/check_redis_replication *"; options = [ "NOPASSWD" ]; } + { command = "${myplugins}/check_last_file_date /backup2/*"; options = [ "NOPASSWD" ]; } + ]; + users = [ "naemon" ]; + runAs = "redis"; + } { commands = [ { command = "${myplugins}/check_last_file_date /backup2/*"; options = [ "NOPASSWD" ]; } diff --git a/modules/private/monitoring/objects_backup-2.nix b/modules/private/monitoring/objects_backup-2.nix index fad6763..c302e45 100644 --- a/modules/private/monitoring/objects_backup-2.nix +++ b/modules/private/monitoring/objects_backup-2.nix @@ -26,6 +26,21 @@ use = "local-service"; check_command = ["check_last_file_date" "/backup2/eldiron/postgresql_backup" "7" "postgres"]; } + { + service_description = "Redis replication for eldiron is up to date"; + use = "local-service"; + check_command = ["check_redis_replication" "/run/redis_eldiron/redis.sock"]; + } + { + service_description = "Last redis dump in /backup2/eldiron/redis_backup is not too old"; + use = "local-service"; + check_command = ["check_last_file_date" "/backup2/eldiron/redis_backup" "7" "redis"]; + } + { + service_description = "Mysql replication for eldiron is up to date"; + use = "local-service"; + check_command = ["check_mysql_replication" "/run/mysqld_eldiron/mysqld.sock" "/var/secrets/mysql_replication/eldiron/client"]; + } { service_description = "Last mysql dump in /backup2/eldiron/mysql_backup is not too old"; use = "local-service"; diff --git a/modules/private/monitoring/objects_common.nix b/modules/private/monitoring/objects_common.nix index 8466fdb..1ab9fc3 100644 --- a/modules/private/monitoring/objects_common.nix +++ b/modules/private/monitoring/objects_common.nix @@ -74,7 +74,9 @@ check_memory = "$USER2$/check_mem.sh -w $ARG1$ -c $ARG2$"; check_command_output = "$USER2$/check_command -c \"$ARG1$\" -s 0 -o \"$ARG2$\" $ARG3$"; check_ntp = "$USER1$/check_ntp_time -t 30 -q -H 0.arch.pool.ntp.org"; + check_mysql_replication = "${sudo} -u mysql $USER2$/check_mysql_replication \"$ARG1$\" \"$ARG2$\""; check_postgresql_replication = "${sudo} -u postgres $USER2$/check_postgres_replication \"$ARG1$\" \"$ARG2$\" \"$ARG3$\""; + check_redis_replication = "${sudo} -u redis $USER2$/check_redis_replication \"$ARG1$\""; check_mailq = "$USER1$/check_mailq -s -w 1 -c 2"; check_host_alive = "$USER1$/check_ping -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 5"; diff --git a/modules/private/monitoring/plugins/check_mysql_replication b/modules/private/monitoring/plugins/check_mysql_replication new file mode 100755 index 0000000..8923928 --- /dev/null +++ b/modules/private/monitoring/plugins/check_mysql_replication @@ -0,0 +1,35 @@ +#!/bin/bash + +STATE_OK=0 +STATE_WARNING=1 +STATE_CRITICAL=2 +STATE_UNKNOWN=3 + +socket=$1 +config_file=$2 +info=$(mysql --defaults-file=${config_file} -S $socket -e "show slave status" --vertical) +exit_code=$? + +lag=$(echo "$info" | grep Seconds_Behind_Master | cut -d':' -f2 | sed -e "s/\s//g") + +if [[ $exit_code -ne 0 ]]; then + echo "UNKNOWN - Impossible to run mysql command" + exit $STATE_UNKNOWN +elif [[ -z "$lag" ]]; then + echo "UNKNOWN - No replication found for mysql" + exit $STATE_UNKNOWN +else + output="Replication lag for mysql is ${lag}s" + LC_ALL=C lag=$(printf "%.*f" 0 $lag) + + if [[ $lag -lt 5 ]]; then + echo "OK - $output" + exit $STATE_OK + elif [[ $lag -lt 10 ]]; then + echo "WARNING - $output" + exit $STATE_WARNING + else + echo "CRITICAL - $output" + exit $STATE_CRITICAL + fi +fi diff --git a/modules/private/monitoring/plugins/check_redis_replication b/modules/private/monitoring/plugins/check_redis_replication new file mode 100755 index 0000000..7a884e1 --- /dev/null +++ b/modules/private/monitoring/plugins/check_redis_replication @@ -0,0 +1,38 @@ +#!/bin/bash + +STATE_OK=0 +STATE_WARNING=1 +STATE_CRITICAL=2 +STATE_UNKNOWN=3 + +socket=$1 + +info=$(redis-cli -s $socket info replication) +lag=$(echo "$info" | grep master_last_io_seconds_ago | cut -d":" -f2 | sed -e "s/\s//g") +slave_offset=$(echo "$info" | grep slave_repl_offset | cut -d":" -f2 | sed -e "s/\s//g") +master_offset=$(echo "$info" | grep master_repl_offset | cut -d":" -f2 | sed -e "s/\s//g") +offset=$(($master_offset - $slave_offset)) + +exit_code=$? + +if [[ $exit_code -ne 0 ]]; then + echo "UNKNOWN - Impossible to run redis command" + exit $STATE_UNKNOWN +elif [[ -z "$lag" ]]; then + echo "UNKNOWN - No replication found" + exit $STATE_UNKNOWN +else + output="Replication lag for redis is ${lag}s and offset is ${offset}" + LC_ALL=C lag=$(printf "%.*f" 0 $lag) + + if [[ $lag -lt 5 && $offset -lt 5 ]]; then + echo "OK - $output" + exit $STATE_OK + elif [[ $lag -lt 10 && $offset -lt 10 ]]; then + echo "WARNING - $output" + exit $STATE_WARNING + else + echo "CRITICAL - $output" + exit $STATE_CRITICAL + fi +fi -- 2.41.0