--- /dev/null
+# vim: filetype=nagios
+
+define service {
+ service_description Size on /backup2 partition
+ check_command check_local_disk!10%!5%!/backup2
+ use local-service
+}
+
+define command {
+ command_line /run/wrappers/bin/sudo -u "$ARG3$" $USER2$/check_last_file_date "$ARG1$" "$ARG2$"
+ command_name check_last_file_date
+}
+
+define service {
+ service_description Last backup in /backup2/phare is not too old
+ check_command check_last_file_date!/backup2/phare!14!backup
+ use local-service
+}
+
+define service {
+ service_description Last backup in /backup2/immae_eu is not too old
+ check_command check_last_file_date!/backup2/immae_eu!14!backup
+ use local-service
+}
+
+define service {
+ service_description Last backup in /backup2/immae_fr is not too old
+ check_command check_last_file_date!/backup2/immae_fr!14!backup
+ use local-service
+}
+
+define service {
+ service_description Last postgresql dump in /backup2/eldiron/postgresql_backup is not too old
+ check_command check_last_file_date!/backup2/eldiron/postgresql_backup!7!postgres
+ use local-service
+}
--- /dev/null
+# vim: filetype=nagios
+#
+define command {
+ command_line /run/wrappers/bin/sudo -u postgres $USER2$/check_postgres_replication "$ARG1$" "$ARG2$" "$ARG3$"
+ command_name check_postgresql_replication
+}
+
+define service {
+ service_description Postgresql replication for backup-1 is up to date
+ check_command check_postgresql_replication!backup-1!/run/postgresql!5432
+ use local-service
+}
+
+define service {
+ service_description Postgresql replication for backup-2 is up to date
+ check_command check_postgresql_replication!backup-2!/run/postgresql!5432
+ use local-service
+}
+
+define service {
+ service_description mailq is empty
+ use local-service
+ check_command check_mailq
+}
+
+define command {
+ command_name check_mailq
+ command_line $USER1$/check_mailq -s -w 1 -c 2
+}
-{ config, myconfig, pkgs, lib, ... }:
+{ config, myconfig, pkgs, lib, name, hostFQDN, ... }:
let
myplugins = pkgs.runCommand "buildplugins" {
buildInputs = [ pkgs.makeWrapper pkgs.perl ];
wrapProgram $out/check_mem.sh --prefix PATH : ${lib.makeBinPath [
pkgs.gnugrep pkgs.gawk pkgs.procps-ng
]}
+ wrapProgram $out/check_postgres_replication --prefix PATH : ${lib.makeBinPath [
+ pkgs.postgresql
+ ]}
'';
+ defaultObjects =
+ let specific_file = ./conf + "/specific_" + name + ".cfg";
+ in
+ builtins.readFile ./conf/local_services.cfg
+ + builtins.readFile ./conf/timeperiods.cfg
+ + builtins.readFile ./conf/services.cfg
+ + builtins.readFile ./conf/contacts.cfg
+ + builtins.readFile ./conf/hosts.cfg
+ + ''
+ define command {
+ command_line ${myplugins}/send_nrdp.sh -u "$USER200$" -t "$USER201$" -H "$HOSTADDRESS$" -s "$SERVICEDESC$" -S "$SERVICESTATEID$" -o "$SERVICEOUTPUT$"
+ command_name notify-master
+ }
+ define service {
+ service_description No mdadm array is degraded
+ use local-service
+ check_command check_command_output!${pkgs.mdadm}/bin/mdadm --monitor --scan -1!^$!-s 0 -r root
+ }
+
+ define service {
+ name local-service
+ use generic-service
+ host_name ${hostFQDN}
+ check_interval 5
+ max_check_attempts 4
+ register 0
+ retry_interval 1
+ }
+ define host {
+ host_name ${hostFQDN}
+ alias ${hostFQDN}
+ address ${hostFQDN}
+ use linux-server
+ }
+ ''
+ + lib.strings.optionalString (builtins.pathExists specific_file) (builtins.readFile specific_file);
in
{
options = {
- myServices.monitoring.enable = lib.mkOption {
- type = lib.types.bool;
- default = false;
- description = ''
- Whether to enable monitoring.
- '';
+ myServices.monitoring = {
+ enable = lib.mkOption {
+ type = lib.types.bool;
+ default = false;
+ description = ''
+ Whether to enable monitoring.
+ '';
+ };
};
};
users = [ "naemon" ];
runAs = "root";
}
+ {
+ commands = [
+ { command = "${myplugins}/check_postgres_replication *"; options = [ "NOPASSWD" ]; }
+ { command = "${myplugins}/check_last_file_date /backup2/*"; options = [ "NOPASSWD" ]; }
+ ];
+ users = [ "naemon" ];
+ runAs = "postgres";
+ }
+ {
+ commands = [
+ { command = "${myplugins}/check_last_file_date /backup2/*"; options = [ "NOPASSWD" ]; }
+ ];
+ users = [ "naemon" ];
+ runAs = "backup";
+ }
];
environment.etc."mdadm.conf" = {
enable = true;
$USER200$=${myconfig.env.monitoring.status_url}
$USER201$=${myconfig.env.monitoring.status_token}
'';
- objectDefs = builtins.readFile ./conf/local_services.cfg
- + builtins.readFile ./conf/timeperiods.cfg
- + builtins.readFile ./conf/services.cfg
- + builtins.readFile ./conf/contacts.cfg
- + builtins.readFile ./conf/hosts.cfg
- + ''
- define command {
- command_line ${myplugins}/send_nrdp.sh -u "$USER200$" -t "$USER201$" -H "$HOSTADDRESS$" -s "$SERVICEDESC$" -S "$SERVICESTATEID$" -o "$SERVICEOUTPUT$"
- command_name notify-master
- }
- define service {
- service_description No mdadm array is degraded
- use local-service
- check_command check_command_output!${pkgs.mdadm}/bin/mdadm --monitor --scan -1!^$!-s 0 -r root
- }
-
- define service {
- service_description mailq is empty
- use local-service
- check_command check_mailq
- }
-
- define command {
- command_name check_mailq
- command_line $USER1$/check_mailq -s -w 1 -c 2
- }
-
- define service {
- name local-service
- use generic-service
- host_name eldiron.immae.eu
- check_interval 5
- max_check_attempts 4
- register 0
- retry_interval 1
- }
- define host {
- host_name eldiron.immae.eu
- alias eldiron.immae.eu
- address eldiron.immae.eu
- use linux-server
- }
- '';
+ objectDefs = defaultObjects;
};
};
}
--- /dev/null
+#!/bin/bash
+
+STATE_OK=0
+STATE_WARNING=1
+STATE_CRITICAL=2
+STATE_UNKNOWN=3
+
+user=$1
+host=$2
+port=$3
+
+lag=$(psql -h $host -p $port -A -t -c "SELECT COALESCE(EXTRACT(EPOCH FROM replay_lag),0) FROM pg_stat_replication WHERE usename='$user'" 2>/dev/null)
+exit_code=$?
+
+if [[ $exit_code -ne 0 ]]; then
+ echo "UNKNOWN - Impossible to run psql command"
+ exit $STATE_UNKNOWN
+elif [[ -z "$lag" ]]; then
+ echo "UNKNOWN - No replication found for $user"
+ exit $STATE_UNKNOWN
+else
+ output="Replication lag for $user is ${lag}s"
+ LC_ALL=C lag=$(printf "%.*f" 0 $lag)
+
+ if [[ $lag -lt 5 ]]; then
+ echo "OK - $output"
+ exit $STATE_OK
+ elif [[ $lag -lt 10 ]]; then
+ echo "WARNING - $output"
+ exit $STATE_WARNING
+ else
+ echo "CRITICAL - $output"
+ exit $STATE_CRITICAL
+ fi
+fi