diff options
author | Ismaël Bouya <ismael.bouya@normalesup.org> | 2019-12-01 18:25:16 +0100 |
---|---|---|
committer | Ismaël Bouya <ismael.bouya@normalesup.org> | 2019-12-01 18:25:16 +0100 |
commit | 9f2025235d888eb4a7822024a5fad2e288388814 (patch) | |
tree | cd9ed388375e5843b373a5975b1e902b61ecaded /modules | |
parent | 0012da0ff3d45df9f68412b90be4f7c24d46a777 (diff) | |
download | Nix-9f2025235d888eb4a7822024a5fad2e288388814.tar.gz Nix-9f2025235d888eb4a7822024a5fad2e288388814.tar.zst Nix-9f2025235d888eb4a7822024a5fad2e288388814.zip |
Add monitoring for backup-2
Diffstat (limited to 'modules')
-rw-r--r-- | modules/private/monitoring/conf/specific_backup-2.cfg | 36 | ||||
-rw-r--r-- | modules/private/monitoring/conf/specific_eldiron.cfg | 29 | ||||
-rw-r--r-- | modules/private/monitoring/default.nix | 114 | ||||
-rwxr-xr-x | modules/private/monitoring/plugins/check_last_file_date | 26 | ||||
-rwxr-xr-x | modules/private/monitoring/plugins/check_postgres_replication | 35 | ||||
-rw-r--r-- | modules/private/system/backup-2.nix | 2 | ||||
-rw-r--r-- | modules/private/system/eldiron.nix | 1 |
7 files changed, 193 insertions, 50 deletions
diff --git a/modules/private/monitoring/conf/specific_backup-2.cfg b/modules/private/monitoring/conf/specific_backup-2.cfg new file mode 100644 index 0000000..ff91322 --- /dev/null +++ b/modules/private/monitoring/conf/specific_backup-2.cfg | |||
@@ -0,0 +1,36 @@ | |||
1 | # vim: filetype=nagios | ||
2 | |||
3 | define service { | ||
4 | service_description Size on /backup2 partition | ||
5 | check_command check_local_disk!10%!5%!/backup2 | ||
6 | use local-service | ||
7 | } | ||
8 | |||
9 | define command { | ||
10 | command_line /run/wrappers/bin/sudo -u "$ARG3$" $USER2$/check_last_file_date "$ARG1$" "$ARG2$" | ||
11 | command_name check_last_file_date | ||
12 | } | ||
13 | |||
14 | define service { | ||
15 | service_description Last backup in /backup2/phare is not too old | ||
16 | check_command check_last_file_date!/backup2/phare!14!backup | ||
17 | use local-service | ||
18 | } | ||
19 | |||
20 | define service { | ||
21 | service_description Last backup in /backup2/immae_eu is not too old | ||
22 | check_command check_last_file_date!/backup2/immae_eu!14!backup | ||
23 | use local-service | ||
24 | } | ||
25 | |||
26 | define service { | ||
27 | service_description Last backup in /backup2/immae_fr is not too old | ||
28 | check_command check_last_file_date!/backup2/immae_fr!14!backup | ||
29 | use local-service | ||
30 | } | ||
31 | |||
32 | define service { | ||
33 | service_description Last postgresql dump in /backup2/eldiron/postgresql_backup is not too old | ||
34 | check_command check_last_file_date!/backup2/eldiron/postgresql_backup!7!postgres | ||
35 | use local-service | ||
36 | } | ||
diff --git a/modules/private/monitoring/conf/specific_eldiron.cfg b/modules/private/monitoring/conf/specific_eldiron.cfg new file mode 100644 index 0000000..fd5a43d --- /dev/null +++ b/modules/private/monitoring/conf/specific_eldiron.cfg | |||
@@ -0,0 +1,29 @@ | |||
1 | # vim: filetype=nagios | ||
2 | # | ||
3 | define command { | ||
4 | command_line /run/wrappers/bin/sudo -u postgres $USER2$/check_postgres_replication "$ARG1$" "$ARG2$" "$ARG3$" | ||
5 | command_name check_postgresql_replication | ||
6 | } | ||
7 | |||
8 | define service { | ||
9 | service_description Postgresql replication for backup-1 is up to date | ||
10 | check_command check_postgresql_replication!backup-1!/run/postgresql!5432 | ||
11 | use local-service | ||
12 | } | ||
13 | |||
14 | define service { | ||
15 | service_description Postgresql replication for backup-2 is up to date | ||
16 | check_command check_postgresql_replication!backup-2!/run/postgresql!5432 | ||
17 | use local-service | ||
18 | } | ||
19 | |||
20 | define service { | ||
21 | service_description mailq is empty | ||
22 | use local-service | ||
23 | check_command check_mailq | ||
24 | } | ||
25 | |||
26 | define command { | ||
27 | command_name check_mailq | ||
28 | command_line $USER1$/check_mailq -s -w 1 -c 2 | ||
29 | } | ||
diff --git a/modules/private/monitoring/default.nix b/modules/private/monitoring/default.nix index c5acd40..6062aba 100644 --- a/modules/private/monitoring/default.nix +++ b/modules/private/monitoring/default.nix | |||
@@ -1,4 +1,4 @@ | |||
1 | { config, myconfig, pkgs, lib, ... }: | 1 | { config, myconfig, pkgs, lib, name, hostFQDN, ... }: |
2 | let | 2 | let |
3 | myplugins = pkgs.runCommand "buildplugins" { | 3 | myplugins = pkgs.runCommand "buildplugins" { |
4 | buildInputs = [ pkgs.makeWrapper pkgs.perl ]; | 4 | buildInputs = [ pkgs.makeWrapper pkgs.perl ]; |
@@ -13,16 +13,57 @@ let | |||
13 | wrapProgram $out/check_mem.sh --prefix PATH : ${lib.makeBinPath [ | 13 | wrapProgram $out/check_mem.sh --prefix PATH : ${lib.makeBinPath [ |
14 | pkgs.gnugrep pkgs.gawk pkgs.procps-ng | 14 | pkgs.gnugrep pkgs.gawk pkgs.procps-ng |
15 | ]} | 15 | ]} |
16 | wrapProgram $out/check_postgres_replication --prefix PATH : ${lib.makeBinPath [ | ||
17 | pkgs.postgresql | ||
18 | ]} | ||
16 | ''; | 19 | ''; |
20 | defaultObjects = | ||
21 | let specific_file = ./conf + "/specific_" + name + ".cfg"; | ||
22 | in | ||
23 | builtins.readFile ./conf/local_services.cfg | ||
24 | + builtins.readFile ./conf/timeperiods.cfg | ||
25 | + builtins.readFile ./conf/services.cfg | ||
26 | + builtins.readFile ./conf/contacts.cfg | ||
27 | + builtins.readFile ./conf/hosts.cfg | ||
28 | + '' | ||
29 | define command { | ||
30 | command_line ${myplugins}/send_nrdp.sh -u "$USER200$" -t "$USER201$" -H "$HOSTADDRESS$" -s "$SERVICEDESC$" -S "$SERVICESTATEID$" -o "$SERVICEOUTPUT$" | ||
31 | command_name notify-master | ||
32 | } | ||
33 | define service { | ||
34 | service_description No mdadm array is degraded | ||
35 | use local-service | ||
36 | check_command check_command_output!${pkgs.mdadm}/bin/mdadm --monitor --scan -1!^$!-s 0 -r root | ||
37 | } | ||
38 | |||
39 | define service { | ||
40 | name local-service | ||
41 | use generic-service | ||
42 | host_name ${hostFQDN} | ||
43 | check_interval 5 | ||
44 | max_check_attempts 4 | ||
45 | register 0 | ||
46 | retry_interval 1 | ||
47 | } | ||
48 | define host { | ||
49 | host_name ${hostFQDN} | ||
50 | alias ${hostFQDN} | ||
51 | address ${hostFQDN} | ||
52 | use linux-server | ||
53 | } | ||
54 | '' | ||
55 | + lib.strings.optionalString (builtins.pathExists specific_file) (builtins.readFile specific_file); | ||
17 | in | 56 | in |
18 | { | 57 | { |
19 | options = { | 58 | options = { |
20 | myServices.monitoring.enable = lib.mkOption { | 59 | myServices.monitoring = { |
21 | type = lib.types.bool; | 60 | enable = lib.mkOption { |
22 | default = false; | 61 | type = lib.types.bool; |
23 | description = '' | 62 | default = false; |
24 | Whether to enable monitoring. | 63 | description = '' |
25 | ''; | 64 | Whether to enable monitoring. |
65 | ''; | ||
66 | }; | ||
26 | }; | 67 | }; |
27 | }; | 68 | }; |
28 | 69 | ||
@@ -39,6 +80,21 @@ in | |||
39 | users = [ "naemon" ]; | 80 | users = [ "naemon" ]; |
40 | runAs = "root"; | 81 | runAs = "root"; |
41 | } | 82 | } |
83 | { | ||
84 | commands = [ | ||
85 | { command = "${myplugins}/check_postgres_replication *"; options = [ "NOPASSWD" ]; } | ||
86 | { command = "${myplugins}/check_last_file_date /backup2/*"; options = [ "NOPASSWD" ]; } | ||
87 | ]; | ||
88 | users = [ "naemon" ]; | ||
89 | runAs = "postgres"; | ||
90 | } | ||
91 | { | ||
92 | commands = [ | ||
93 | { command = "${myplugins}/check_last_file_date /backup2/*"; options = [ "NOPASSWD" ]; } | ||
94 | ]; | ||
95 | users = [ "naemon" ]; | ||
96 | runAs = "backup"; | ||
97 | } | ||
42 | ]; | 98 | ]; |
43 | environment.etc."mdadm.conf" = { | 99 | environment.etc."mdadm.conf" = { |
44 | enable = true; | 100 | enable = true; |
@@ -66,49 +122,7 @@ in | |||
66 | $USER200$=${myconfig.env.monitoring.status_url} | 122 | $USER200$=${myconfig.env.monitoring.status_url} |
67 | $USER201$=${myconfig.env.monitoring.status_token} | 123 | $USER201$=${myconfig.env.monitoring.status_token} |
68 | ''; | 124 | ''; |
69 | objectDefs = builtins.readFile ./conf/local_services.cfg | 125 | objectDefs = defaultObjects; |
70 | + builtins.readFile ./conf/timeperiods.cfg | ||
71 | + builtins.readFile ./conf/services.cfg | ||
72 | + builtins.readFile ./conf/contacts.cfg | ||
73 | + builtins.readFile ./conf/hosts.cfg | ||
74 | + '' | ||
75 | define command { | ||
76 | command_line ${myplugins}/send_nrdp.sh -u "$USER200$" -t "$USER201$" -H "$HOSTADDRESS$" -s "$SERVICEDESC$" -S "$SERVICESTATEID$" -o "$SERVICEOUTPUT$" | ||
77 | command_name notify-master | ||
78 | } | ||
79 | define service { | ||
80 | service_description No mdadm array is degraded | ||
81 | use local-service | ||
82 | check_command check_command_output!${pkgs.mdadm}/bin/mdadm --monitor --scan -1!^$!-s 0 -r root | ||
83 | } | ||
84 | |||
85 | define service { | ||
86 | service_description mailq is empty | ||
87 | use local-service | ||
88 | check_command check_mailq | ||
89 | } | ||
90 | |||
91 | define command { | ||
92 | command_name check_mailq | ||
93 | command_line $USER1$/check_mailq -s -w 1 -c 2 | ||
94 | } | ||
95 | |||
96 | define service { | ||
97 | name local-service | ||
98 | use generic-service | ||
99 | host_name eldiron.immae.eu | ||
100 | check_interval 5 | ||
101 | max_check_attempts 4 | ||
102 | register 0 | ||
103 | retry_interval 1 | ||
104 | } | ||
105 | define host { | ||
106 | host_name eldiron.immae.eu | ||
107 | alias eldiron.immae.eu | ||
108 | address eldiron.immae.eu | ||
109 | use linux-server | ||
110 | } | ||
111 | ''; | ||
112 | }; | 126 | }; |
113 | }; | 127 | }; |
114 | } | 128 | } |
diff --git a/modules/private/monitoring/plugins/check_last_file_date b/modules/private/monitoring/plugins/check_last_file_date new file mode 100755 index 0000000..df45bbc --- /dev/null +++ b/modules/private/monitoring/plugins/check_last_file_date | |||
@@ -0,0 +1,26 @@ | |||
1 | #!/bin/bash | ||
2 | |||
3 | STATE_OK=0 | ||
4 | STATE_WARNING=1 | ||
5 | STATE_CRITICAL=2 | ||
6 | STATE_UNKNOWN=3 | ||
7 | |||
8 | base_path=$1 | ||
9 | hours=$2 | ||
10 | |||
11 | last_date=$(find $base_path -mindepth 1 -maxdepth 1 -printf "%T@\n" 2>/dev/null | sort | tail -n 1) | ||
12 | |||
13 | if [ -z "$last_date" ]; then | ||
14 | echo "UNKNOWN: Could not read folder" | ||
15 | exit $STATE_UNKNOWN | ||
16 | else | ||
17 | LC_ALL=C last_date=$(printf "%.*f" 0 $last_date) | ||
18 | min_date=$(date -d "$hours hours ago" "+%s") | ||
19 | if [ "$min_date" -lt "$last_date" ]; then | ||
20 | echo "OK: Last file $(date -d @$last_date)" | ||
21 | exit $STATE_OK | ||
22 | else | ||
23 | echo "CRITICAL: Last file $(date -d @$last_date)" | ||
24 | exit $STATE_CRITICAL | ||
25 | fi | ||
26 | fi | ||
diff --git a/modules/private/monitoring/plugins/check_postgres_replication b/modules/private/monitoring/plugins/check_postgres_replication new file mode 100755 index 0000000..009b4d5 --- /dev/null +++ b/modules/private/monitoring/plugins/check_postgres_replication | |||
@@ -0,0 +1,35 @@ | |||
1 | #!/bin/bash | ||
2 | |||
3 | STATE_OK=0 | ||
4 | STATE_WARNING=1 | ||
5 | STATE_CRITICAL=2 | ||
6 | STATE_UNKNOWN=3 | ||
7 | |||
8 | user=$1 | ||
9 | host=$2 | ||
10 | port=$3 | ||
11 | |||
12 | lag=$(psql -h $host -p $port -A -t -c "SELECT COALESCE(EXTRACT(EPOCH FROM replay_lag),0) FROM pg_stat_replication WHERE usename='$user'" 2>/dev/null) | ||
13 | exit_code=$? | ||
14 | |||
15 | if [[ $exit_code -ne 0 ]]; then | ||
16 | echo "UNKNOWN - Impossible to run psql command" | ||
17 | exit $STATE_UNKNOWN | ||
18 | elif [[ -z "$lag" ]]; then | ||
19 | echo "UNKNOWN - No replication found for $user" | ||
20 | exit $STATE_UNKNOWN | ||
21 | else | ||
22 | output="Replication lag for $user is ${lag}s" | ||
23 | LC_ALL=C lag=$(printf "%.*f" 0 $lag) | ||
24 | |||
25 | if [[ $lag -lt 5 ]]; then | ||
26 | echo "OK - $output" | ||
27 | exit $STATE_OK | ||
28 | elif [[ $lag -lt 10 ]]; then | ||
29 | echo "WARNING - $output" | ||
30 | exit $STATE_WARNING | ||
31 | else | ||
32 | echo "CRITICAL - $output" | ||
33 | exit $STATE_CRITICAL | ||
34 | fi | ||
35 | fi | ||
diff --git a/modules/private/system/backup-2.nix b/modules/private/system/backup-2.nix index 1c5b7d8..3120a57 100644 --- a/modules/private/system/backup-2.nix +++ b/modules/private/system/backup-2.nix | |||
@@ -3,6 +3,7 @@ | |||
3 | { | 3 | { |
4 | boot.kernelPackages = pkgs.linuxPackages_latest; | 4 | boot.kernelPackages = pkgs.linuxPackages_latest; |
5 | _module.args.privateFiles = privateFiles; | 5 | _module.args.privateFiles = privateFiles; |
6 | _module.args.hostFQDN = "backup-2.v.immae.eu"; | ||
6 | imports = builtins.attrValues (import ../..); | 7 | imports = builtins.attrValues (import ../..); |
7 | 8 | ||
8 | deployment = { | 9 | deployment = { |
@@ -48,6 +49,7 @@ | |||
48 | ssh_key_private = myconfig.env.rsync_backup.ssh_key.private; | 49 | ssh_key_private = myconfig.env.rsync_backup.ssh_key.private; |
49 | }; | 50 | }; |
50 | 51 | ||
52 | myServices.monitoring.enable = true; | ||
51 | myServices.databasesReplication = { | 53 | myServices.databasesReplication = { |
52 | postgresql = { | 54 | postgresql = { |
53 | enable = true; | 55 | enable = true; |
diff --git a/modules/private/system/eldiron.nix b/modules/private/system/eldiron.nix index 5384bc2..d79cf16 100644 --- a/modules/private/system/eldiron.nix +++ b/modules/private/system/eldiron.nix | |||
@@ -3,6 +3,7 @@ | |||
3 | { | 3 | { |
4 | boot.kernelPackages = pkgs.linuxPackages_latest; | 4 | boot.kernelPackages = pkgs.linuxPackages_latest; |
5 | _module.args.privateFiles = privateFiles; | 5 | _module.args.privateFiles = privateFiles; |
6 | _module.args.hostFQDN = "eldiron.immae.eu"; | ||
6 | 7 | ||
7 | networking = { | 8 | networking = { |
8 | firewall.enable = true; | 9 | firewall.enable = true; |