aboutsummaryrefslogtreecommitdiff
path: root/modules
diff options
context:
space:
mode:
authorIsmaël Bouya <ismael.bouya@normalesup.org>2019-12-01 18:25:16 +0100
committerIsmaël Bouya <ismael.bouya@normalesup.org>2019-12-01 18:25:16 +0100
commit9f2025235d888eb4a7822024a5fad2e288388814 (patch)
treecd9ed388375e5843b373a5975b1e902b61ecaded /modules
parent0012da0ff3d45df9f68412b90be4f7c24d46a777 (diff)
downloadNix-9f2025235d888eb4a7822024a5fad2e288388814.tar.gz
Nix-9f2025235d888eb4a7822024a5fad2e288388814.tar.zst
Nix-9f2025235d888eb4a7822024a5fad2e288388814.zip
Add monitoring for backup-2
Diffstat (limited to 'modules')
-rw-r--r--modules/private/monitoring/conf/specific_backup-2.cfg36
-rw-r--r--modules/private/monitoring/conf/specific_eldiron.cfg29
-rw-r--r--modules/private/monitoring/default.nix114
-rwxr-xr-xmodules/private/monitoring/plugins/check_last_file_date26
-rwxr-xr-xmodules/private/monitoring/plugins/check_postgres_replication35
-rw-r--r--modules/private/system/backup-2.nix2
-rw-r--r--modules/private/system/eldiron.nix1
7 files changed, 193 insertions, 50 deletions
diff --git a/modules/private/monitoring/conf/specific_backup-2.cfg b/modules/private/monitoring/conf/specific_backup-2.cfg
new file mode 100644
index 0000000..ff91322
--- /dev/null
+++ b/modules/private/monitoring/conf/specific_backup-2.cfg
@@ -0,0 +1,36 @@
1# vim: filetype=nagios
2
3define service {
4 service_description Size on /backup2 partition
5 check_command check_local_disk!10%!5%!/backup2
6 use local-service
7}
8
9define command {
10 command_line /run/wrappers/bin/sudo -u "$ARG3$" $USER2$/check_last_file_date "$ARG1$" "$ARG2$"
11 command_name check_last_file_date
12}
13
14define service {
15 service_description Last backup in /backup2/phare is not too old
16 check_command check_last_file_date!/backup2/phare!14!backup
17 use local-service
18}
19
20define service {
21 service_description Last backup in /backup2/immae_eu is not too old
22 check_command check_last_file_date!/backup2/immae_eu!14!backup
23 use local-service
24}
25
26define service {
27 service_description Last backup in /backup2/immae_fr is not too old
28 check_command check_last_file_date!/backup2/immae_fr!14!backup
29 use local-service
30}
31
32define service {
33 service_description Last postgresql dump in /backup2/eldiron/postgresql_backup is not too old
34 check_command check_last_file_date!/backup2/eldiron/postgresql_backup!7!postgres
35 use local-service
36}
diff --git a/modules/private/monitoring/conf/specific_eldiron.cfg b/modules/private/monitoring/conf/specific_eldiron.cfg
new file mode 100644
index 0000000..fd5a43d
--- /dev/null
+++ b/modules/private/monitoring/conf/specific_eldiron.cfg
@@ -0,0 +1,29 @@
1# vim: filetype=nagios
2#
3define command {
4 command_line /run/wrappers/bin/sudo -u postgres $USER2$/check_postgres_replication "$ARG1$" "$ARG2$" "$ARG3$"
5 command_name check_postgresql_replication
6}
7
8define service {
9 service_description Postgresql replication for backup-1 is up to date
10 check_command check_postgresql_replication!backup-1!/run/postgresql!5432
11 use local-service
12}
13
14define service {
15 service_description Postgresql replication for backup-2 is up to date
16 check_command check_postgresql_replication!backup-2!/run/postgresql!5432
17 use local-service
18}
19
20define service {
21 service_description mailq is empty
22 use local-service
23 check_command check_mailq
24}
25
26define command {
27 command_name check_mailq
28 command_line $USER1$/check_mailq -s -w 1 -c 2
29}
diff --git a/modules/private/monitoring/default.nix b/modules/private/monitoring/default.nix
index c5acd40..6062aba 100644
--- a/modules/private/monitoring/default.nix
+++ b/modules/private/monitoring/default.nix
@@ -1,4 +1,4 @@
1{ config, myconfig, pkgs, lib, ... }: 1{ config, myconfig, pkgs, lib, name, hostFQDN, ... }:
2let 2let
3 myplugins = pkgs.runCommand "buildplugins" { 3 myplugins = pkgs.runCommand "buildplugins" {
4 buildInputs = [ pkgs.makeWrapper pkgs.perl ]; 4 buildInputs = [ pkgs.makeWrapper pkgs.perl ];
@@ -13,16 +13,57 @@ let
13 wrapProgram $out/check_mem.sh --prefix PATH : ${lib.makeBinPath [ 13 wrapProgram $out/check_mem.sh --prefix PATH : ${lib.makeBinPath [
14 pkgs.gnugrep pkgs.gawk pkgs.procps-ng 14 pkgs.gnugrep pkgs.gawk pkgs.procps-ng
15 ]} 15 ]}
16 wrapProgram $out/check_postgres_replication --prefix PATH : ${lib.makeBinPath [
17 pkgs.postgresql
18 ]}
16 ''; 19 '';
20 defaultObjects =
21 let specific_file = ./conf + "/specific_" + name + ".cfg";
22 in
23 builtins.readFile ./conf/local_services.cfg
24 + builtins.readFile ./conf/timeperiods.cfg
25 + builtins.readFile ./conf/services.cfg
26 + builtins.readFile ./conf/contacts.cfg
27 + builtins.readFile ./conf/hosts.cfg
28 + ''
29 define command {
30 command_line ${myplugins}/send_nrdp.sh -u "$USER200$" -t "$USER201$" -H "$HOSTADDRESS$" -s "$SERVICEDESC$" -S "$SERVICESTATEID$" -o "$SERVICEOUTPUT$"
31 command_name notify-master
32 }
33 define service {
34 service_description No mdadm array is degraded
35 use local-service
36 check_command check_command_output!${pkgs.mdadm}/bin/mdadm --monitor --scan -1!^$!-s 0 -r root
37 }
38
39 define service {
40 name local-service
41 use generic-service
42 host_name ${hostFQDN}
43 check_interval 5
44 max_check_attempts 4
45 register 0
46 retry_interval 1
47 }
48 define host {
49 host_name ${hostFQDN}
50 alias ${hostFQDN}
51 address ${hostFQDN}
52 use linux-server
53 }
54 ''
55 + lib.strings.optionalString (builtins.pathExists specific_file) (builtins.readFile specific_file);
17in 56in
18{ 57{
19 options = { 58 options = {
20 myServices.monitoring.enable = lib.mkOption { 59 myServices.monitoring = {
21 type = lib.types.bool; 60 enable = lib.mkOption {
22 default = false; 61 type = lib.types.bool;
23 description = '' 62 default = false;
24 Whether to enable monitoring. 63 description = ''
25 ''; 64 Whether to enable monitoring.
65 '';
66 };
26 }; 67 };
27 }; 68 };
28 69
@@ -39,6 +80,21 @@ in
39 users = [ "naemon" ]; 80 users = [ "naemon" ];
40 runAs = "root"; 81 runAs = "root";
41 } 82 }
83 {
84 commands = [
85 { command = "${myplugins}/check_postgres_replication *"; options = [ "NOPASSWD" ]; }
86 { command = "${myplugins}/check_last_file_date /backup2/*"; options = [ "NOPASSWD" ]; }
87 ];
88 users = [ "naemon" ];
89 runAs = "postgres";
90 }
91 {
92 commands = [
93 { command = "${myplugins}/check_last_file_date /backup2/*"; options = [ "NOPASSWD" ]; }
94 ];
95 users = [ "naemon" ];
96 runAs = "backup";
97 }
42 ]; 98 ];
43 environment.etc."mdadm.conf" = { 99 environment.etc."mdadm.conf" = {
44 enable = true; 100 enable = true;
@@ -66,49 +122,7 @@ in
66 $USER200$=${myconfig.env.monitoring.status_url} 122 $USER200$=${myconfig.env.monitoring.status_url}
67 $USER201$=${myconfig.env.monitoring.status_token} 123 $USER201$=${myconfig.env.monitoring.status_token}
68 ''; 124 '';
69 objectDefs = builtins.readFile ./conf/local_services.cfg 125 objectDefs = defaultObjects;
70 + builtins.readFile ./conf/timeperiods.cfg
71 + builtins.readFile ./conf/services.cfg
72 + builtins.readFile ./conf/contacts.cfg
73 + builtins.readFile ./conf/hosts.cfg
74 + ''
75 define command {
76 command_line ${myplugins}/send_nrdp.sh -u "$USER200$" -t "$USER201$" -H "$HOSTADDRESS$" -s "$SERVICEDESC$" -S "$SERVICESTATEID$" -o "$SERVICEOUTPUT$"
77 command_name notify-master
78 }
79 define service {
80 service_description No mdadm array is degraded
81 use local-service
82 check_command check_command_output!${pkgs.mdadm}/bin/mdadm --monitor --scan -1!^$!-s 0 -r root
83 }
84
85 define service {
86 service_description mailq is empty
87 use local-service
88 check_command check_mailq
89 }
90
91 define command {
92 command_name check_mailq
93 command_line $USER1$/check_mailq -s -w 1 -c 2
94 }
95
96 define service {
97 name local-service
98 use generic-service
99 host_name eldiron.immae.eu
100 check_interval 5
101 max_check_attempts 4
102 register 0
103 retry_interval 1
104 }
105 define host {
106 host_name eldiron.immae.eu
107 alias eldiron.immae.eu
108 address eldiron.immae.eu
109 use linux-server
110 }
111 '';
112 }; 126 };
113 }; 127 };
114} 128}
diff --git a/modules/private/monitoring/plugins/check_last_file_date b/modules/private/monitoring/plugins/check_last_file_date
new file mode 100755
index 0000000..df45bbc
--- /dev/null
+++ b/modules/private/monitoring/plugins/check_last_file_date
@@ -0,0 +1,26 @@
1#!/bin/bash
2
3STATE_OK=0
4STATE_WARNING=1
5STATE_CRITICAL=2
6STATE_UNKNOWN=3
7
8base_path=$1
9hours=$2
10
11last_date=$(find $base_path -mindepth 1 -maxdepth 1 -printf "%T@\n" 2>/dev/null | sort | tail -n 1)
12
13if [ -z "$last_date" ]; then
14 echo "UNKNOWN: Could not read folder"
15 exit $STATE_UNKNOWN
16else
17 LC_ALL=C last_date=$(printf "%.*f" 0 $last_date)
18 min_date=$(date -d "$hours hours ago" "+%s")
19 if [ "$min_date" -lt "$last_date" ]; then
20 echo "OK: Last file $(date -d @$last_date)"
21 exit $STATE_OK
22 else
23 echo "CRITICAL: Last file $(date -d @$last_date)"
24 exit $STATE_CRITICAL
25 fi
26fi
diff --git a/modules/private/monitoring/plugins/check_postgres_replication b/modules/private/monitoring/plugins/check_postgres_replication
new file mode 100755
index 0000000..009b4d5
--- /dev/null
+++ b/modules/private/monitoring/plugins/check_postgres_replication
@@ -0,0 +1,35 @@
1#!/bin/bash
2
3STATE_OK=0
4STATE_WARNING=1
5STATE_CRITICAL=2
6STATE_UNKNOWN=3
7
8user=$1
9host=$2
10port=$3
11
12lag=$(psql -h $host -p $port -A -t -c "SELECT COALESCE(EXTRACT(EPOCH FROM replay_lag),0) FROM pg_stat_replication WHERE usename='$user'" 2>/dev/null)
13exit_code=$?
14
15if [[ $exit_code -ne 0 ]]; then
16 echo "UNKNOWN - Impossible to run psql command"
17 exit $STATE_UNKNOWN
18elif [[ -z "$lag" ]]; then
19 echo "UNKNOWN - No replication found for $user"
20 exit $STATE_UNKNOWN
21else
22 output="Replication lag for $user is ${lag}s"
23 LC_ALL=C lag=$(printf "%.*f" 0 $lag)
24
25 if [[ $lag -lt 5 ]]; then
26 echo "OK - $output"
27 exit $STATE_OK
28 elif [[ $lag -lt 10 ]]; then
29 echo "WARNING - $output"
30 exit $STATE_WARNING
31 else
32 echo "CRITICAL - $output"
33 exit $STATE_CRITICAL
34 fi
35fi
diff --git a/modules/private/system/backup-2.nix b/modules/private/system/backup-2.nix
index 1c5b7d8..3120a57 100644
--- a/modules/private/system/backup-2.nix
+++ b/modules/private/system/backup-2.nix
@@ -3,6 +3,7 @@
3{ 3{
4 boot.kernelPackages = pkgs.linuxPackages_latest; 4 boot.kernelPackages = pkgs.linuxPackages_latest;
5 _module.args.privateFiles = privateFiles; 5 _module.args.privateFiles = privateFiles;
6 _module.args.hostFQDN = "backup-2.v.immae.eu";
6 imports = builtins.attrValues (import ../..); 7 imports = builtins.attrValues (import ../..);
7 8
8 deployment = { 9 deployment = {
@@ -48,6 +49,7 @@
48 ssh_key_private = myconfig.env.rsync_backup.ssh_key.private; 49 ssh_key_private = myconfig.env.rsync_backup.ssh_key.private;
49 }; 50 };
50 51
52 myServices.monitoring.enable = true;
51 myServices.databasesReplication = { 53 myServices.databasesReplication = {
52 postgresql = { 54 postgresql = {
53 enable = true; 55 enable = true;
diff --git a/modules/private/system/eldiron.nix b/modules/private/system/eldiron.nix
index 5384bc2..d79cf16 100644
--- a/modules/private/system/eldiron.nix
+++ b/modules/private/system/eldiron.nix
@@ -3,6 +3,7 @@
3{ 3{
4 boot.kernelPackages = pkgs.linuxPackages_latest; 4 boot.kernelPackages = pkgs.linuxPackages_latest;
5 _module.args.privateFiles = privateFiles; 5 _module.args.privateFiles = privateFiles;
6 _module.args.hostFQDN = "eldiron.immae.eu";
6 7
7 networking = { 8 networking = {
8 firewall.enable = true; 9 firewall.enable = true;