aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIsmaël Bouya <ismael.bouya@normalesup.org>2019-12-07 15:40:15 +0100
committerIsmaël Bouya <ismael.bouya@normalesup.org>2019-12-07 15:40:15 +0100
commit6015a3b52c3b155ac444aeb39950c38a5e653101 (patch)
tree47ab7a69ddeaedab40a20b0c5cb8a7cc98b0f1f1
parentdded66995529a0419cc56778f4ebb4247c2ab765 (diff)
downloadNix-6015a3b52c3b155ac444aeb39950c38a5e653101.tar.gz
Nix-6015a3b52c3b155ac444aeb39950c38a5e653101.tar.zst
Nix-6015a3b52c3b155ac444aeb39950c38a5e653101.zip
Add mysql and redis monitoring
-rw-r--r--modules/private/databases/mariadb_replication.nix11
-rw-r--r--modules/private/monitoring/default.nix15
-rw-r--r--modules/private/monitoring/objects_backup-2.nix15
-rw-r--r--modules/private/monitoring/objects_common.nix2
-rwxr-xr-xmodules/private/monitoring/plugins/check_mysql_replication35
-rwxr-xr-xmodules/private/monitoring/plugins/check_redis_replication38
6 files changed, 116 insertions, 0 deletions
diff --git a/modules/private/databases/mariadb_replication.nix b/modules/private/databases/mariadb_replication.nix
index 5f97e84..52a34d3 100644
--- a/modules/private/databases/mariadb_replication.nix
+++ b/modules/private/databases/mariadb_replication.nix
@@ -114,6 +114,17 @@ in
114 password = ${hcfg.dumpPassword} 114 password = ${hcfg.dumpPassword}
115 ''; 115 '';
116 } 116 }
117 {
118 dest = "mysql_replication/${name}/client";
119 permissions = "0400";
120 user = "mysql";
121 group = "mysql";
122 text = ''
123 [client]
124 user = ${hcfg.dumpUser}
125 password = ${hcfg.dumpPassword}
126 '';
127 }
117 ]) cfg.hosts); 128 ]) cfg.hosts);
118 129
119 services.cron = { 130 services.cron = {
diff --git a/modules/private/monitoring/default.nix b/modules/private/monitoring/default.nix
index baeebc9..b3f8cbe 100644
--- a/modules/private/monitoring/default.nix
+++ b/modules/private/monitoring/default.nix
@@ -16,6 +16,12 @@ let
16 wrapProgram $out/check_postgres_replication --prefix PATH : ${lib.makeBinPath [ 16 wrapProgram $out/check_postgres_replication --prefix PATH : ${lib.makeBinPath [
17 pkgs.postgresql 17 pkgs.postgresql
18 ]} 18 ]}
19 wrapProgram $out/check_redis_replication --prefix PATH : ${lib.makeBinPath [
20 pkgs.gnugrep pkgs.coreutils pkgs.redis
21 ]}
22 wrapProgram $out/check_mysql_replication --prefix PATH : ${lib.makeBinPath [
23 pkgs.gnugrep pkgs.gnused pkgs.coreutils pkgs.mariadb
24 ]}
19 ''; 25 '';
20 toObjects = pkgs.callPackage ./to_objects.nix {}; 26 toObjects = pkgs.callPackage ./to_objects.nix {};
21 commonConfig = { 27 commonConfig = {
@@ -74,6 +80,7 @@ in
74 } 80 }
75 { 81 {
76 commands = [ 82 commands = [
83 { command = "${myplugins}/check_mysql_replication *"; options = [ "NOPASSWD" ]; }
77 { command = "${myplugins}/check_last_file_date /backup2/*"; options = [ "NOPASSWD" ]; } 84 { command = "${myplugins}/check_last_file_date /backup2/*"; options = [ "NOPASSWD" ]; }
78 ]; 85 ];
79 users = [ "naemon" ]; 86 users = [ "naemon" ];
@@ -81,6 +88,14 @@ in
81 } 88 }
82 { 89 {
83 commands = [ 90 commands = [
91 { command = "${myplugins}/check_redis_replication *"; options = [ "NOPASSWD" ]; }
92 { command = "${myplugins}/check_last_file_date /backup2/*"; options = [ "NOPASSWD" ]; }
93 ];
94 users = [ "naemon" ];
95 runAs = "redis";
96 }
97 {
98 commands = [
84 { command = "${myplugins}/check_last_file_date /backup2/*"; options = [ "NOPASSWD" ]; } 99 { command = "${myplugins}/check_last_file_date /backup2/*"; options = [ "NOPASSWD" ]; }
85 ]; 100 ];
86 users = [ "naemon" ]; 101 users = [ "naemon" ];
diff --git a/modules/private/monitoring/objects_backup-2.nix b/modules/private/monitoring/objects_backup-2.nix
index fad6763..c302e45 100644
--- a/modules/private/monitoring/objects_backup-2.nix
+++ b/modules/private/monitoring/objects_backup-2.nix
@@ -27,6 +27,21 @@
27 check_command = ["check_last_file_date" "/backup2/eldiron/postgresql_backup" "7" "postgres"]; 27 check_command = ["check_last_file_date" "/backup2/eldiron/postgresql_backup" "7" "postgres"];
28 } 28 }
29 { 29 {
30 service_description = "Redis replication for eldiron is up to date";
31 use = "local-service";
32 check_command = ["check_redis_replication" "/run/redis_eldiron/redis.sock"];
33 }
34 {
35 service_description = "Last redis dump in /backup2/eldiron/redis_backup is not too old";
36 use = "local-service";
37 check_command = ["check_last_file_date" "/backup2/eldiron/redis_backup" "7" "redis"];
38 }
39 {
40 service_description = "Mysql replication for eldiron is up to date";
41 use = "local-service";
42 check_command = ["check_mysql_replication" "/run/mysqld_eldiron/mysqld.sock" "/var/secrets/mysql_replication/eldiron/client"];
43 }
44 {
30 service_description = "Last mysql dump in /backup2/eldiron/mysql_backup is not too old"; 45 service_description = "Last mysql dump in /backup2/eldiron/mysql_backup is not too old";
31 use = "local-service"; 46 use = "local-service";
32 check_command = ["check_last_file_date" "/backup2/eldiron/mysql_backup" "7" "mysql"]; 47 check_command = ["check_last_file_date" "/backup2/eldiron/mysql_backup" "7" "mysql"];
diff --git a/modules/private/monitoring/objects_common.nix b/modules/private/monitoring/objects_common.nix
index 8466fdb..1ab9fc3 100644
--- a/modules/private/monitoring/objects_common.nix
+++ b/modules/private/monitoring/objects_common.nix
@@ -74,7 +74,9 @@
74 check_memory = "$USER2$/check_mem.sh -w $ARG1$ -c $ARG2$"; 74 check_memory = "$USER2$/check_mem.sh -w $ARG1$ -c $ARG2$";
75 check_command_output = "$USER2$/check_command -c \"$ARG1$\" -s 0 -o \"$ARG2$\" $ARG3$"; 75 check_command_output = "$USER2$/check_command -c \"$ARG1$\" -s 0 -o \"$ARG2$\" $ARG3$";
76 check_ntp = "$USER1$/check_ntp_time -t 30 -q -H 0.arch.pool.ntp.org"; 76 check_ntp = "$USER1$/check_ntp_time -t 30 -q -H 0.arch.pool.ntp.org";
77 check_mysql_replication = "${sudo} -u mysql $USER2$/check_mysql_replication \"$ARG1$\" \"$ARG2$\"";
77 check_postgresql_replication = "${sudo} -u postgres $USER2$/check_postgres_replication \"$ARG1$\" \"$ARG2$\" \"$ARG3$\""; 78 check_postgresql_replication = "${sudo} -u postgres $USER2$/check_postgres_replication \"$ARG1$\" \"$ARG2$\" \"$ARG3$\"";
79 check_redis_replication = "${sudo} -u redis $USER2$/check_redis_replication \"$ARG1$\"";
78 check_mailq = "$USER1$/check_mailq -s -w 1 -c 2"; 80 check_mailq = "$USER1$/check_mailq -s -w 1 -c 2";
79 81
80 check_host_alive = "$USER1$/check_ping -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 5"; 82 check_host_alive = "$USER1$/check_ping -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 5";
diff --git a/modules/private/monitoring/plugins/check_mysql_replication b/modules/private/monitoring/plugins/check_mysql_replication
new file mode 100755
index 0000000..8923928
--- /dev/null
+++ b/modules/private/monitoring/plugins/check_mysql_replication
@@ -0,0 +1,35 @@
1#!/bin/bash
2
3STATE_OK=0
4STATE_WARNING=1
5STATE_CRITICAL=2
6STATE_UNKNOWN=3
7
8socket=$1
9config_file=$2
10info=$(mysql --defaults-file=${config_file} -S $socket -e "show slave status" --vertical)
11exit_code=$?
12
13lag=$(echo "$info" | grep Seconds_Behind_Master | cut -d':' -f2 | sed -e "s/\s//g")
14
15if [[ $exit_code -ne 0 ]]; then
16 echo "UNKNOWN - Impossible to run mysql command"
17 exit $STATE_UNKNOWN
18elif [[ -z "$lag" ]]; then
19 echo "UNKNOWN - No replication found for mysql"
20 exit $STATE_UNKNOWN
21else
22 output="Replication lag for mysql is ${lag}s"
23 LC_ALL=C lag=$(printf "%.*f" 0 $lag)
24
25 if [[ $lag -lt 5 ]]; then
26 echo "OK - $output"
27 exit $STATE_OK
28 elif [[ $lag -lt 10 ]]; then
29 echo "WARNING - $output"
30 exit $STATE_WARNING
31 else
32 echo "CRITICAL - $output"
33 exit $STATE_CRITICAL
34 fi
35fi
diff --git a/modules/private/monitoring/plugins/check_redis_replication b/modules/private/monitoring/plugins/check_redis_replication
new file mode 100755
index 0000000..7a884e1
--- /dev/null
+++ b/modules/private/monitoring/plugins/check_redis_replication
@@ -0,0 +1,38 @@
1#!/bin/bash
2
3STATE_OK=0
4STATE_WARNING=1
5STATE_CRITICAL=2
6STATE_UNKNOWN=3
7
8socket=$1
9
10info=$(redis-cli -s $socket info replication)
11lag=$(echo "$info" | grep master_last_io_seconds_ago | cut -d":" -f2 | sed -e "s/\s//g")
12slave_offset=$(echo "$info" | grep slave_repl_offset | cut -d":" -f2 | sed -e "s/\s//g")
13master_offset=$(echo "$info" | grep master_repl_offset | cut -d":" -f2 | sed -e "s/\s//g")
14offset=$(($master_offset - $slave_offset))
15
16exit_code=$?
17
18if [[ $exit_code -ne 0 ]]; then
19 echo "UNKNOWN - Impossible to run redis command"
20 exit $STATE_UNKNOWN
21elif [[ -z "$lag" ]]; then
22 echo "UNKNOWN - No replication found"
23 exit $STATE_UNKNOWN
24else
25 output="Replication lag for redis is ${lag}s and offset is ${offset}"
26 LC_ALL=C lag=$(printf "%.*f" 0 $lag)
27
28 if [[ $lag -lt 5 && $offset -lt 5 ]]; then
29 echo "OK - $output"
30 exit $STATE_OK
31 elif [[ $lag -lt 10 && $offset -lt 10 ]]; then
32 echo "WARNING - $output"
33 exit $STATE_WARNING
34 else
35 echo "CRITICAL - $output"
36 exit $STATE_CRITICAL
37 fi
38fi