From d8f933bd00a5cc416da00cd26c9d13f7a1c02486 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Isma=C3=ABl=20Bouya?= Date: Sun, 1 Jul 2018 15:35:43 +0200 Subject: Add monitoring --- modules/profile/files/monitoring/check_command | 113 +++++++++++++++++++++++++ modules/profile/files/monitoring/check_md_raid | 32 +++++++ 2 files changed, 145 insertions(+) create mode 100644 modules/profile/files/monitoring/check_command create mode 100644 modules/profile/files/monitoring/check_md_raid (limited to 'modules/profile/files') diff --git a/modules/profile/files/monitoring/check_command b/modules/profile/files/monitoring/check_command new file mode 100644 index 0000000..2c7eded --- /dev/null +++ b/modules/profile/files/monitoring/check_command @@ -0,0 +1,113 @@ +#!/usr/bin/perl + +use strict; +use Getopt::Std; +$| = 1; + +my %opts; +getopts('hr:C:c:s:o:', \%opts); + +my $STATE_OK = 0; +my $STATE_WARNING = 1; +my $STATE_CRITICAL = 2; +my $STATE_UNKNOWN = 3; + +if ($opts{'h'} || scalar(%opts) == 0) { + &print_help(); + exit($STATE_OK); +} + +my $command = $opts{'c'}; +if ($command eq '') { + print "You must provide a command to check.\n"; + exit($STATE_UNKNOWN); +} + +my $expected_output = $opts{'o'}; +my $expected_status = $opts{'s'}; +my $other_command = $opts{'C'}; + +if ($other_command eq '' and $expected_status eq '' and $expected_output eq '') { + $expected_status = 0; +} + +my $cmd = $command . ' 2>&1'; +my $other_cmd; +if ($other_command ne '') { + $other_cmd = $other_command . ' 2>&1'; +} + +my $run_as; +if ($opts{'r'}) { + $run_as = $opts{'r'}; + $cmd = "sudo -u $run_as -n $cmd"; + + if ($other_command ne '') { + $other_cmd = "sudo -u $run_as -n $other_cmd"; + } + +} + +my $cmd_result = `$cmd`; +my $other_cmd_result; +if ($other_command ne '') { + $other_cmd_result = `$other_cmd`; + chomp($other_cmd_result); +} + +chomp($cmd_result); +if ($cmd_result =~ /sudo/i) { + print "$command CRITICAL - No sudo right to run the command\n"; + exit($STATE_UNKNOWN); +} elsif ($expected_status ne '') { + if ($? != $expected_status) { + print "$command CRITICAL - Response status $?\n"; + exit($STATE_CRITICAL); + } else { + print "$command OK - Response status $?\n"; + exit($STATE_OK); + } +} elsif ($other_command ne '') { + if ($cmd_result ne $other_cmd_result) { + print "$command CRITICAL - Expected output not matching other command output\n"; + exit($STATE_CRITICAL); + } else { + print "$command OK - Expected output matching other command output\n"; + exit($STATE_OK); + } +} else { + if ($cmd_result !~ /$expected_output/) { + print "$command CRITICAL - Expected output not matching\n"; + exit($STATE_CRITICAL); + } else { + print "$command OK - Expected output matching\n"; + exit($STATE_OK); + } +} + +sub print_help() { + print << "EOF"; +Check whether the given command responds as expected. One of -o -C or -s must be selected. + +Options: +-h + Print detailed help screen + +-c + command to run (required) + +-C + other command to compare output + +-r user + Run as user via sudo. + +-s + status code to check + +-o + output to check + +EOF +} + diff --git a/modules/profile/files/monitoring/check_md_raid b/modules/profile/files/monitoring/check_md_raid new file mode 100644 index 0000000..9c79a7a --- /dev/null +++ b/modules/profile/files/monitoring/check_md_raid @@ -0,0 +1,32 @@ +#!/bin/bash +# +# Created by Sebastian Grewe, Jammicron Technology +# + +# Get count of raid arrays +RAID_DEVICES=`grep ^md -c /proc/mdstat` + +# Get count of degraded arrays +RAID_STATUS=`grep "\[.*_.*\]" /proc/mdstat -c` + +# Is an array currently recovering, get percentage of recovery +RAID_RECOVER=`grep recovery /proc/mdstat | awk '{print $4}'` + +# Check raid status +# RAID recovers --> Warning +if [[ $RAID_RECOVER ]]; then + STATUS="WARNING - Checked $RAID_DEVICES arrays, recovering : $RAID_RECOVER" + EXIT=1 +# RAID ok +elif [[ $RAID_STATUS == "0" ]]; then + STATUS="OK - Checked $RAID_DEVICES arrays." + EXIT=0 +# All else critical, better save than sorry +else + STATUS="CRITICAL - Checked $RAID_DEVICES arrays, $RAID_STATUS have FAILED" + EXIT=2 +fi + +# Status and quit +echo $STATUS +exit $EXIT -- cgit v1.2.3 From b5305b5cad5cbb0a2c072b29f2d4dc05126c39d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Isma=C3=ABl=20Bouya?= Date: Sun, 8 Jul 2018 21:51:30 +0200 Subject: Add postgresql monitoring --- .../profile/files/monitoring/check_last_file_date | 31 +++++++++++++++++++ .../files/monitoring/check_postgres_replication | 35 ++++++++++++++++++++++ 2 files changed, 66 insertions(+) create mode 100644 modules/profile/files/monitoring/check_last_file_date create mode 100644 modules/profile/files/monitoring/check_postgres_replication (limited to 'modules/profile/files') diff --git a/modules/profile/files/monitoring/check_last_file_date b/modules/profile/files/monitoring/check_last_file_date new file mode 100644 index 0000000..8eabb57 --- /dev/null +++ b/modules/profile/files/monitoring/check_last_file_date @@ -0,0 +1,31 @@ +#!/bin/bash + +STATE_OK=0 +STATE_WARNING=1 +STATE_CRITICAL=2 +STATE_UNKNOWN=3 + +base_path=$1 +hours=$2 +as_user=$3 + +if [ -z "$as_user" ]; then + last_date=$(find $base_path -mindepth 1 -maxdepth 1 -printf "%T@\n" 2>/dev/null | sort | tail -n 1) +else + last_date=$(sudo -u "$as_user" find $base_path -mindepth 1 -maxdepth 1 -printf "%T@\n" 2>/dev/null | sort | tail -n 1) +fi + +if [ -z "$last_date" ]; then + echo "UNKNOWN: Could not read folder" + exit $STATE_UNKNOWN +else + LC_ALL=C last_date=$(printf "%.*f" 0 $last_date) + min_date=$(date -d "$hours hours ago" "+%s") + if [ "$min_date" -lt "$last_date" ]; then + echo "OK: Last backup $(date -d @$last_date)" + exit $STATE_OK + else + echo "CRITICAL: Last backup $(date -d @$last_date)" + exit $STATE_CRITICAL + fi +fi diff --git a/modules/profile/files/monitoring/check_postgres_replication b/modules/profile/files/monitoring/check_postgres_replication new file mode 100644 index 0000000..163c68a --- /dev/null +++ b/modules/profile/files/monitoring/check_postgres_replication @@ -0,0 +1,35 @@ +#!/bin/bash + +STATE_OK=0 +STATE_WARNING=1 +STATE_CRITICAL=2 +STATE_UNKNOWN=3 + +user=$1 +host=$2 +port=$3 + +lag=$(psql -h $host -p $port -A -t -c "SELECT COALESCE(EXTRACT(EPOCH FROM replay_lag),0) FROM pg_stat_replication WHERE usename='$user'" 2>/dev/null) +exit_code=$? + +if [[ $exit_code -ne 0 ]]; then + echo "UNKNOWN:Impossible to run psql command" + exit $STATE_UNKNOWN +elif [[ -z "$lag" ]]; then + echo "UNKNOWN:No replication found for $user" + exit $STATE_UNKNOWN +else + output="Replication lag for $user is ${lag}s" + LC_ALL=C lag=$(printf "%.*f" 0 $lag) + + if [[ $lag -lt 5 ]]; then + echo "OK:$output" + exit $STATE_OK + elif [[ $lag -lt 10 ]]; then + echo "WARNING:$output" + exit $STATE_WARNING + else + echo "CRITICAL:$output" + exit $STATE_CRITICAL + fi +fi -- cgit v1.2.3 From a0df248a2be61557b8a67c3d6e4df24dc3e7843e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Isma=C3=ABl=20Bouya?= Date: Tue, 10 Jul 2018 12:36:52 +0200 Subject: Add monitoring for etherpad --- modules/profile/files/monitoring/check_postgres_replication | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'modules/profile/files') diff --git a/modules/profile/files/monitoring/check_postgres_replication b/modules/profile/files/monitoring/check_postgres_replication index 163c68a..a550077 100644 --- a/modules/profile/files/monitoring/check_postgres_replication +++ b/modules/profile/files/monitoring/check_postgres_replication @@ -13,23 +13,23 @@ lag=$(psql -h $host -p $port -A -t -c "SELECT COALESCE(EXTRACT(EPOCH FROM replay exit_code=$? if [[ $exit_code -ne 0 ]]; then - echo "UNKNOWN:Impossible to run psql command" + echo "UNKNOWN - Impossible to run psql command" exit $STATE_UNKNOWN elif [[ -z "$lag" ]]; then - echo "UNKNOWN:No replication found for $user" + echo "UNKNOWN - No replication found for $user" exit $STATE_UNKNOWN else output="Replication lag for $user is ${lag}s" LC_ALL=C lag=$(printf "%.*f" 0 $lag) if [[ $lag -lt 5 ]]; then - echo "OK:$output" + echo "OK - $output" exit $STATE_OK elif [[ $lag -lt 10 ]]; then - echo "WARNING:$output" + echo "WARNING - $output" exit $STATE_WARNING else - echo "CRITICAL:$output" + echo "CRITICAL - $output" exit $STATE_CRITICAL fi fi -- cgit v1.2.3