diff options
author | Ismaël Bouya <ismael.bouya@normalesup.org> | 2023-10-04 01:35:06 +0200 |
---|---|---|
committer | Ismaël Bouya <ismael.bouya@normalesup.org> | 2023-10-04 02:11:48 +0200 |
commit | 1a64deeb894dc95e2645a75771732c6cc53a79ad (patch) | |
tree | 1b9df4838f894577a09b9b260151756272efeb53 /flakes/private/monitoring/plugins | |
parent | fa25ffd4583cc362075cd5e1b4130f33306103f0 (diff) | |
download | Nix-1a64deeb894dc95e2645a75771732c6cc53a79ad.tar.gz Nix-1a64deeb894dc95e2645a75771732c6cc53a79ad.tar.zst Nix-1a64deeb894dc95e2645a75771732c6cc53a79ad.zip |
Squash changes containing private information
There were a lot of changes since the previous commit, but a lot of them
contained personnal information about users. All thos changes got
stashed into a single commit (history is kept in a different place) and
private information was moved in a separate private repository
Diffstat (limited to 'flakes/private/monitoring/plugins')
21 files changed, 1421 insertions, 0 deletions
diff --git a/flakes/private/monitoring/plugins/check_backup_age b/flakes/private/monitoring/plugins/check_backup_age new file mode 100755 index 0000000..d873bdc --- /dev/null +++ b/flakes/private/monitoring/plugins/check_backup_age | |||
@@ -0,0 +1,66 @@ | |||
1 | #!/usr/bin/env bash | ||
2 | |||
3 | set -euo pipefail | ||
4 | |||
5 | source $SECRETS_PATH | ||
6 | export HOME=$(mktemp -d) | ||
7 | |||
8 | trap "rm -rf $HOME" EXIT | ||
9 | folder=$1 | ||
10 | |||
11 | parse_date() { | ||
12 | d=$1 | ||
13 | echo $d | sed -e "s/^\(....\)\(..\)\(..\)T\(..\)\(..\)\(..\)/\1-\2-\3T\4:\5:\6/" | ||
14 | } | ||
15 | |||
16 | output=$(duplicity collection-status --log-fd 2 "$BASE_URL$folder" 2>&1 > /dev/null) | ||
17 | |||
18 | output=$(echo "$output" | grep -v "^\.") | ||
19 | |||
20 | last_full=$(parse_date "$(echo "$output" | grep "^ full " | cut -d' ' -f3 | sort | tail -n1)") | ||
21 | last_bkp=$(parse_date "$(echo "$output" | grep -E "^ (full|inc) " | cut -d' ' -f3 | sort | tail -n1)") | ||
22 | orphaned_sets=$(echo "$output" | grep "^orphaned-sets-num" | cut -d' ' -f2) | ||
23 | incomplete_sets=$(echo "$output" | grep "^incomplete-sets-num" | cut -d' ' -f2) | ||
24 | |||
25 | if [[ -z "$last_full" || -z "$last_bkp" || -z "$orphaned_sets" || -z "$incomplete_sets" ]]; then | ||
26 | echo "duply-backup $folder UNKNOWN - impossible to parse result" | ||
27 | exit 3 | ||
28 | fi | ||
29 | |||
30 | last_full_age=$(( ($(date "+%s") - $(date -d "$last_full" "+%s")) / (60*60*24) )) | ||
31 | last_bkp_age=$(( ($(date "+%s") - $(date -d "$last_bkp" "+%s")) / (60*60) )) | ||
32 | |||
33 | PERFS="orphan=$orphaned_sets;1;;0; incomplete=$incomplete_sets;1;;0; age=${last_bkp_age}h;30;48;0; full_age=${last_full_age}d;35;45;0;" | ||
34 | |||
35 | |||
36 | WARNINGS="" | ||
37 | ERRORS="" | ||
38 | if [[ "$incomplete_sets" -gt 0 ]]; then | ||
39 | WARNINGS="$WARNINGS - Incomplete sets is $incomplete_sets" | ||
40 | fi | ||
41 | |||
42 | if [[ "$orphaned_sets" -gt 0 ]]; then | ||
43 | WARNINGS="$WARNINGS - Orphaned sets is $orphaned_sets" | ||
44 | fi | ||
45 | |||
46 | if [[ "$last_full_age" -gt 45 ]]; then | ||
47 | ERRORS="$ERRORS - Last full backup is too old $last_full" | ||
48 | elif [[ "$last_full_age" -gt 35 ]]; then | ||
49 | WARNINGS="$WARNINGS - Last full backup is getting old $last_full" | ||
50 | fi | ||
51 | |||
52 | if [[ "$last_bkp_age" -gt 48 ]]; then | ||
53 | ERRORS="$ERRORS - Last backup is too old $last_bkp" | ||
54 | elif [[ "$last_bkp_age" -gt 30 ]]; then | ||
55 | WARNINGS="$WARNINGS - Last backup is getting old $last_bkp" | ||
56 | fi | ||
57 | |||
58 | if [[ -n "$ERRORS" ]]; then | ||
59 | echo "duply-backup $folder CRITICAL$ERRORS$WARNINGS | $PERFS" | ||
60 | exit 2 | ||
61 | elif [[ -n "$WARNINGS" ]]; then | ||
62 | echo "duply-backup $folder WARNING$WARNINGS | $PERFS" | ||
63 | exit 1 | ||
64 | else | ||
65 | echo "duply-backup $folder OK | $PERFS" | ||
66 | fi | ||
diff --git a/flakes/private/monitoring/plugins/check_bandwidth b/flakes/private/monitoring/plugins/check_bandwidth new file mode 100755 index 0000000..21d01f5 --- /dev/null +++ b/flakes/private/monitoring/plugins/check_bandwidth | |||
@@ -0,0 +1,122 @@ | |||
1 | #!/bin/bash | ||
2 | |||
3 | # ============================== SUMMARY ===================================== | ||
4 | #Author : Ken Roulamellah | ||
5 | #Date : 19/07/2018 | ||
6 | #Version : 1.0 | ||
7 | # Licence : GPL | ||
8 | # ===================== INFORMATION ABOUT THIS PLUGIN ======================== | ||
9 | # | ||
10 | # This plugin checks the average RX and TX bandwidth utilisation. It use | ||
11 | # kbytes as measure unite. | ||
12 | # | ||
13 | # ========================== START OF PROGRAM CODE =========================== | ||
14 | |||
15 | STATE_OK=0 | ||
16 | STATE_WARNING=1 | ||
17 | STATE_CRITICAL=2 | ||
18 | STATE_UNKNOWN=3 | ||
19 | |||
20 | interface=$( ip route | grep default | awk '{print $5}' | head -n1) | ||
21 | function print_usage() | ||
22 | { | ||
23 | echo "Usage :" | ||
24 | echo "$0 [ -i=INTERFACE] [ -ct=COUNT ] -w WARNING -c CRITICAL" | ||
25 | echo "This script calculate the average bandwith usage." | ||
26 | echo "Default values | interface: ${interface}, counter: 10" | ||
27 | } | ||
28 | |||
29 | counter=10 | ||
30 | warning=-1 | ||
31 | critical=-1 | ||
32 | |||
33 | sum_rx=0 | ||
34 | sum_tx=0 | ||
35 | avg_rx= | ||
36 | avg_tx= | ||
37 | i= | ||
38 | |||
39 | |||
40 | if [[ $# -lt 4 ]]; | ||
41 | then | ||
42 | echo "Error: Arguments are missing" | ||
43 | print_usage | ||
44 | exit $STATE_UNKNOWN | ||
45 | fi | ||
46 | |||
47 | while [[ $# -gt 0 ]]; do | ||
48 | case "$1" in | ||
49 | -i=*) | ||
50 | interface="$(cut -d'=' -f2 <<<"$1")" | ||
51 | shift | ||
52 | ;; | ||
53 | -ct=*) | ||
54 | counter="$(cut -d'=' -f2 <<<"$1")" | ||
55 | shift | ||
56 | ;; | ||
57 | -w) | ||
58 | warning=$2 | ||
59 | shift 2 | ||
60 | ;; | ||
61 | -c) | ||
62 | critical=$2 | ||
63 | shift 2 | ||
64 | ;; | ||
65 | *) | ||
66 | printf "\nError: Invalid option '$1'" | ||
67 | print_usage | ||
68 | exit $STATE_UNKNOWN | ||
69 | ;; | ||
70 | esac | ||
71 | done | ||
72 | |||
73 | if [ $warning -lt 0 ] || [ $critical -lt 0 ]; | ||
74 | then | ||
75 | echo "Error: You need to specify a warning and critical treshold" | ||
76 | print_usage | ||
77 | exit $STATE_UNKNOWN | ||
78 | fi | ||
79 | |||
80 | grep -q "up" /sys/class/net/$interface/operstate || exec echo "$interface: no such device or down" | ||
81 | |||
82 | read rx <"/sys/class/net/$interface/statistics/rx_bytes" | ||
83 | read tx <"/sys/class/net/$interface/statistics/tx_bytes" | ||
84 | |||
85 | i=$counter | ||
86 | while [ $i -gt 0 ]; do | ||
87 | sleep 1 | ||
88 | read newrx <"/sys/class/net/$interface/statistics/rx_bytes" | ||
89 | read newtx <"/sys/class/net/$interface/statistics/tx_bytes" | ||
90 | |||
91 | #echo "old rx :$rx" | ||
92 | #echo "new rx :$newrx" | ||
93 | rx_cal=$(bc <<< "scale=2; ($newrx-$rx) / 1000") | ||
94 | tx_cal=$(bc <<< "scale=2; ($newtx-$tx) / 1000") | ||
95 | |||
96 | sum_rx=$(bc <<< "scale=2;$sum_rx+$rx_cal") | ||
97 | sum_tx=$(bc <<< "scale=2;$sum_tx+$tx_cal") | ||
98 | |||
99 | #echo "$interface {rx: $rx_cal ko/s, tx: $tx_cal ko/s}" | ||
100 | rx=$newrx | ||
101 | tx=$newtx | ||
102 | ((i --)) | ||
103 | done | ||
104 | |||
105 | avg_rx=$(bc <<< "scale=2;$sum_rx/$counter") | ||
106 | avg_tx=$(bc <<< "scale=2;$sum_tx/$counter") | ||
107 | |||
108 | #echo "$avg_rx" | ||
109 | #echo "$avg_tx" | ||
110 | |||
111 | |||
112 | if [ $(bc <<< "$avg_rx > $critical || $avg_tx > $critical") -eq 1 ]; then | ||
113 | echo "$interface CRITICAL - AVG_RX: $avg_rx kb/s, AVG_TX: $avg_tx kb/s | RX="$avg_rx"kbps;0;0;0; TX="$avg_tx"kbps;0;0;0;" | ||
114 | exit $STATE_CRITICAL | ||
115 | elif [ $(bc <<< "$avg_rx > $warning || $avg_tx > $warning") -eq 1 ]; then | ||
116 | echo "$interface WARNING - AVG_RX: $avg_rx kb/s, AVG_TX: $avg_tx kb/s | RX="$avg_rx"kbps;0;0;0; TX="$avg_tx"kbps;0;0;0;" | ||
117 | exit $STATE_WARNING | ||
118 | else | ||
119 | echo "$interface - OK AVG_RX: $avg_rx kb/s, AVG_TX: $avg_tx kb/s | RX="$avg_rx"kbps;0;0;0; TX="$avg_tx"kbps;0;0;0;" | ||
120 | exit $STATE_OK | ||
121 | fi | ||
122 | exit 3 | ||
diff --git a/flakes/private/monitoring/plugins/check_command b/flakes/private/monitoring/plugins/check_command new file mode 100755 index 0000000..2b546c1 --- /dev/null +++ b/flakes/private/monitoring/plugins/check_command | |||
@@ -0,0 +1,113 @@ | |||
1 | #!/usr/bin/env perl | ||
2 | |||
3 | use strict; | ||
4 | use Getopt::Std; | ||
5 | $| = 1; | ||
6 | |||
7 | my %opts; | ||
8 | getopts('hr:C:c:s:o:', \%opts); | ||
9 | |||
10 | my $STATE_OK = 0; | ||
11 | my $STATE_WARNING = 1; | ||
12 | my $STATE_CRITICAL = 2; | ||
13 | my $STATE_UNKNOWN = 3; | ||
14 | |||
15 | if ($opts{'h'} || scalar(%opts) == 0) { | ||
16 | &print_help(); | ||
17 | exit($STATE_OK); | ||
18 | } | ||
19 | |||
20 | my $command = $opts{'c'}; | ||
21 | if ($command eq '') { | ||
22 | print "You must provide a command to check.\n"; | ||
23 | exit($STATE_UNKNOWN); | ||
24 | } | ||
25 | |||
26 | my $expected_output = $opts{'o'}; | ||
27 | my $expected_status = $opts{'s'}; | ||
28 | my $other_command = $opts{'C'}; | ||
29 | |||
30 | if ($other_command eq '' and $expected_status eq '' and $expected_output eq '') { | ||
31 | $expected_status = 0; | ||
32 | } | ||
33 | |||
34 | my $cmd = $command . ' 2>&1'; | ||
35 | my $other_cmd; | ||
36 | if ($other_command ne '') { | ||
37 | $other_cmd = $other_command . ' 2>&1'; | ||
38 | } | ||
39 | |||
40 | my $run_as; | ||
41 | if ($opts{'r'}) { | ||
42 | $run_as = $opts{'r'}; | ||
43 | $cmd = "sudo -u $run_as -n $cmd"; | ||
44 | |||
45 | if ($other_command ne '') { | ||
46 | $other_cmd = "sudo -u $run_as -n $other_cmd"; | ||
47 | } | ||
48 | |||
49 | } | ||
50 | |||
51 | my $cmd_result = `$cmd`; | ||
52 | my $other_cmd_result; | ||
53 | if ($other_command ne '') { | ||
54 | $other_cmd_result = `$other_cmd`; | ||
55 | chomp($other_cmd_result); | ||
56 | } | ||
57 | |||
58 | chomp($cmd_result); | ||
59 | if ($cmd_result =~ /sudo/i) { | ||
60 | print "$command CRITICAL - No sudo right to run the command | result=1;;;;\n"; | ||
61 | exit($STATE_UNKNOWN); | ||
62 | } elsif ($expected_status ne '') { | ||
63 | if ($? != $expected_status) { | ||
64 | print "$command CRITICAL - Response status $? | result=1;;;;\n"; | ||
65 | exit($STATE_CRITICAL); | ||
66 | } else { | ||
67 | print "$command OK - Response status $? | result=0;;;;\n"; | ||
68 | exit($STATE_OK); | ||
69 | } | ||
70 | } elsif ($other_command ne '') { | ||
71 | if ($cmd_result ne $other_cmd_result) { | ||
72 | print "$command CRITICAL - Expected output not matching other command output | result=1;;;;\n"; | ||
73 | exit($STATE_CRITICAL); | ||
74 | } else { | ||
75 | print "$command OK - Expected output matching other command output | result=0;;;;\n"; | ||
76 | exit($STATE_OK); | ||
77 | } | ||
78 | } else { | ||
79 | if ($cmd_result !~ /$expected_output/) { | ||
80 | print "$command CRITICAL - Expected output not matching | result=1;;;;\n"; | ||
81 | exit($STATE_CRITICAL); | ||
82 | } else { | ||
83 | print "$command OK - Expected output matching | result=0;;;;\n"; | ||
84 | exit($STATE_OK); | ||
85 | } | ||
86 | } | ||
87 | |||
88 | sub print_help() { | ||
89 | print << "EOF"; | ||
90 | Check whether the given command responds as expected. One of -o -C or -s must be selected. | ||
91 | |||
92 | Options: | ||
93 | -h | ||
94 | Print detailed help screen | ||
95 | |||
96 | -c | ||
97 | command to run (required) | ||
98 | |||
99 | -C | ||
100 | other command to compare output | ||
101 | |||
102 | -r user | ||
103 | Run as user via sudo. | ||
104 | |||
105 | -s | ||
106 | status code to check | ||
107 | |||
108 | -o | ||
109 | output to check | ||
110 | |||
111 | EOF | ||
112 | } | ||
113 | |||
diff --git a/flakes/private/monitoring/plugins/check_emails b/flakes/private/monitoring/plugins/check_emails new file mode 100755 index 0000000..534e5a5 --- /dev/null +++ b/flakes/private/monitoring/plugins/check_emails | |||
@@ -0,0 +1,121 @@ | |||
1 | #!/usr/bin/env perl | ||
2 | |||
3 | use strict; | ||
4 | use Getopt::Std; | ||
5 | use File::Basename; | ||
6 | use Date::Parse; | ||
7 | use POSIX qw(strftime); | ||
8 | |||
9 | $| = 1; | ||
10 | |||
11 | my %opts; | ||
12 | getopts('hH:l:s:p:f:i:n:r:', \%opts); | ||
13 | |||
14 | my $STATE_OK = 0; | ||
15 | my $STATE_WARNING = 1; | ||
16 | my $STATE_CRITICAL = 2; | ||
17 | my $STATE_UNKNOWN = 3; | ||
18 | |||
19 | if ($opts{'h'} || scalar(%opts) == 0) { | ||
20 | &print_help(); | ||
21 | exit($STATE_OK); | ||
22 | } | ||
23 | |||
24 | my $port = $opts{'p'}; | ||
25 | my $host = $opts{'H'}; | ||
26 | my $login = $opts{'l'}; | ||
27 | if ($login ne '') { | ||
28 | $login = "$login@"; | ||
29 | } | ||
30 | |||
31 | my $identity = $opts{'i'}; | ||
32 | my $local_directory = $opts{'n'}; | ||
33 | my $return_path = $opts{'r'}; | ||
34 | |||
35 | my @emails_to_send = split(/,/, $opts{'s'}); | ||
36 | my @emails_to_expect = split(/,/, $opts{'f'}); | ||
37 | |||
38 | my $cmd_result; | ||
39 | if ($local_directory ne '') { | ||
40 | if (@emails_to_expect and ! -d $local_directory) { | ||
41 | print "Emails $host UNKNOWN - Could not find local directory"; | ||
42 | exit($STATE_UNKNOWN); | ||
43 | } | ||
44 | $cmd_result = `send_mails $local_directory $return_path @emails_to_send 2>&1`; | ||
45 | } else { | ||
46 | $cmd_result = `ssh -o BatchMode=yes -o UserKnownHostsFile=/dev/null -o CheckHostIP=no -o StrictHostKeyChecking=no -p $port -i $identity $login$host send_mails @emails_to_send 2>&1`; | ||
47 | |||
48 | if ($cmd_result =~ /Host key verification failed./) { | ||
49 | print "Emails $host UNKNOWN - Could not connect to host with ssh key\n"; | ||
50 | exit($STATE_UNKNOWN); | ||
51 | } | ||
52 | } | ||
53 | |||
54 | my @lines = split(/\n/, $cmd_result); | ||
55 | |||
56 | my %found_emails; | ||
57 | |||
58 | foreach my $line (@lines) { | ||
59 | my @split_line = split(/;/, $line, 2); | ||
60 | $found_emails{$split_line[0]} = $split_line[1]; | ||
61 | } | ||
62 | |||
63 | my $output = ""; | ||
64 | my $old = 0; | ||
65 | foreach my $email_from (@emails_to_expect) { | ||
66 | my @email_split = split(/:/, $email_from); | ||
67 | my $email = $email_split[0]; | ||
68 | my $from = $email_split[1]; | ||
69 | |||
70 | if ( exists $found_emails{$email} ) { | ||
71 | my $email_date = str2time($found_emails{$email}); | ||
72 | my $current_date = strftime "%s", localtime; | ||
73 | |||
74 | if ($current_date - $email_date > 60*30) { | ||
75 | $output = "$output$email ($found_emails{$email} from $from) "; | ||
76 | } | ||
77 | $old = ($current_date - $email_date) > $old ? ($current_date - $email_date) : $old; | ||
78 | } else { | ||
79 | $output = "$output$email (missing) " | ||
80 | } | ||
81 | } | ||
82 | |||
83 | if ($output ne '') { | ||
84 | print "Emails $host CRITICAL - expecting emails: $output | timestamp=${old}s;;;;\n"; | ||
85 | exit($STATE_CRITICAL); | ||
86 | } else { | ||
87 | print "Emails $host OK | timestamp=${old}s;;;;\n"; | ||
88 | exit($STATE_OK); | ||
89 | } | ||
90 | |||
91 | sub print_help() { | ||
92 | print << "EOF"; | ||
93 | Check sent emails | ||
94 | |||
95 | Options: | ||
96 | -h | ||
97 | Print detailed help screen | ||
98 | |||
99 | -H | ||
100 | Host to check | ||
101 | |||
102 | -l | ||
103 | Login | ||
104 | |||
105 | -i | ||
106 | Identity file | ||
107 | |||
108 | -n | ||
109 | Don’t use ssh, pass that directory to script | ||
110 | |||
111 | -r | ||
112 | Return path for local e-mails | ||
113 | |||
114 | -s | ||
115 | Comma separated list of emails to send from the host. | ||
116 | |||
117 | -f | ||
118 | Comma separated list of emails to expect on the host. | ||
119 | EOF | ||
120 | } | ||
121 | |||
diff --git a/flakes/private/monitoring/plugins/check_eriomem b/flakes/private/monitoring/plugins/check_eriomem new file mode 100755 index 0000000..880b88a --- /dev/null +++ b/flakes/private/monitoring/plugins/check_eriomem | |||
@@ -0,0 +1,83 @@ | |||
1 | #!/usr/bin/env python | ||
2 | import os | ||
3 | import sys | ||
4 | import getopt | ||
5 | import signal | ||
6 | from subprocess import Popen, PIPE | ||
7 | |||
8 | STATE_OK = 0 | ||
9 | STATE_WARNING = 1 | ||
10 | STATE_CRITICAL = 2 | ||
11 | STATE_UNKNOWN = 3 | ||
12 | |||
13 | keys = sys.argv[1].split(",") | ||
14 | |||
15 | def to_args(k): | ||
16 | access, secret = k.split(":", 1) | ||
17 | return [ | ||
18 | "s3cmd", | ||
19 | '-c=/dev/null', | ||
20 | '--no-check-certificate', | ||
21 | '--access_key={}'.format(access), | ||
22 | '--secret_key={}'.format(secret), | ||
23 | '--host=e.eriomem.net', | ||
24 | '--host-bucket=%(bucket)s.e.eriomem.net', | ||
25 | 'du' | ||
26 | ] | ||
27 | |||
28 | max_size = 1024*1024*1024*1024 | ||
29 | warning_percent = 99.75 | ||
30 | critical_percent = 99.95 | ||
31 | |||
32 | def output(code, msg): | ||
33 | print(msg) | ||
34 | sys.exit(code) | ||
35 | |||
36 | def main(): | ||
37 | def handler(signum, frame): | ||
38 | raise IOError | ||
39 | signal.signal(signal.SIGALRM, handler) | ||
40 | signal.alarm(60) | ||
41 | |||
42 | try: | ||
43 | ps = [Popen(to_args(a), stdout=PIPE, stderr=PIPE) for a in keys] | ||
44 | outs = [p.communicate() for p in ps] | ||
45 | rets = [p.wait() for p in ps] | ||
46 | except IOError: | ||
47 | for p in ps: | ||
48 | os.kill(p.pid, signal.SIGTERM) | ||
49 | output(STATE_UNKNOWN, | ||
50 | "Eriomem UNKNOWN - Command timeout after 60 seconds!") | ||
51 | |||
52 | signal.alarm(0) | ||
53 | |||
54 | if sum(rets) == 0: | ||
55 | usages = [int(out[0].decode().split("\n")[-2].split()[0]) for out in outs] | ||
56 | usage = sum(usages) | ||
57 | use_percent = 100 * usage / max_size | ||
58 | if use_percent > critical_percent: | ||
59 | output(STATE_CRITICAL, | ||
60 | "Eriomem CRITICAL - bucket usage: %s (%s%%);| size=%s;;;;" % | ||
61 | (sizeof_fmt(usage), use_percent, sizeof_fmt(usage))) | ||
62 | elif use_percent > warning_percent: | ||
63 | output(STATE_WARNING, | ||
64 | "Eriomem WARNING - bucket usage: %s (%s%%);| size=%s;;;;" % | ||
65 | (sizeof_fmt(usage), use_percent, sizeof_fmt(usage))) | ||
66 | else: | ||
67 | output(STATE_OK, | ||
68 | "Eriomem OK - bucket usage: %s (%d%%);| size=%s;;;;" % | ||
69 | (sizeof_fmt(usage), use_percent, sizeof_fmt(usage))) | ||
70 | else: | ||
71 | messages = "\n".join([out[0].decode() + out[1].decode() for out in outs]) | ||
72 | output(STATE_UNKNOWN, | ||
73 | "Eriomem UNKNOWN - Error in command") | ||
74 | |||
75 | def sizeof_fmt(num): | ||
76 | for unit in ['','ko','Mo','Go','To','Po','Eo','Zo']: | ||
77 | if abs(num) < 1024.0: | ||
78 | return "%3.1f%s" % (num, unit) | ||
79 | num /= 1024.0 | ||
80 | return "%.1f%s%s" % (num, 'Yo') | ||
81 | |||
82 | if __name__ == '__main__': | ||
83 | main() | ||
diff --git a/flakes/private/monitoring/plugins/check_ftp_database b/flakes/private/monitoring/plugins/check_ftp_database new file mode 100755 index 0000000..f9cf579 --- /dev/null +++ b/flakes/private/monitoring/plugins/check_ftp_database | |||
@@ -0,0 +1,11 @@ | |||
1 | #!/usr/bin/env bash | ||
2 | |||
3 | OUT=$(echo "ls" | lftp -u test_ftp,test_ftp eldiron.immae.eu | grep it_works | wc -l) | ||
4 | |||
5 | if [ "$OUT" -eq 1 ]; then | ||
6 | echo "ftp connection OK - access to ftp is working | ftp=1;;;;" | ||
7 | exit 0 | ||
8 | else | ||
9 | echo "ftp connection CRITICAL - no access to ftp | ftp=0;;;;" | ||
10 | exit 2 | ||
11 | fi | ||
diff --git a/flakes/private/monitoring/plugins/check_git b/flakes/private/monitoring/plugins/check_git new file mode 100755 index 0000000..e8fbb29 --- /dev/null +++ b/flakes/private/monitoring/plugins/check_git | |||
@@ -0,0 +1,81 @@ | |||
1 | #!/usr/bin/env bash | ||
2 | |||
3 | SSH_KEY="$1" | ||
4 | |||
5 | TMPDIR=$(mktemp -d) | ||
6 | |||
7 | if [ ! -d "$TMPDIR" ]; then | ||
8 | echo "gitolite UNKNOWN - impossible to create temp dir" | ||
9 | exit 3 | ||
10 | fi | ||
11 | |||
12 | trap "rm -rf $TMPDIR" EXIT | ||
13 | |||
14 | ERRORS="" | ||
15 | OUTPUT="" | ||
16 | PERFS="" | ||
17 | |||
18 | cd "$TMPDIR" | ||
19 | OUT=$(git clone -q git://git.immae.eu/perso/Immae/Projets/Ruby/Monitor.git 2>&1) | ||
20 | ERR=$? | ||
21 | if [ -n "$OUT" ]; then | ||
22 | OUTPUT="$OUTPUT | ||
23 | $OUT" | ||
24 | fi | ||
25 | if [ "$ERR" != 0 ]; then | ||
26 | PERFS="$PERFS git=0;;;;" | ||
27 | ERRORS="$ERRORS git://" | ||
28 | else | ||
29 | PERFS="$PERFS git=1;;;;" | ||
30 | fi | ||
31 | rm -rf Monitor | ||
32 | |||
33 | OUT=$(git clone -q http://git.immae.eu/perso/Immae/Projets/Ruby/Monitor.git 2>&1) | ||
34 | ERR=$? | ||
35 | if [ -n "$OUT" ]; then | ||
36 | OUTPUT="$OUTPUT | ||
37 | $OUT" | ||
38 | fi | ||
39 | if [ "$ERR" != 0 ]; then | ||
40 | ERRORS="$ERRORS http://" | ||
41 | PERFS="$PERFS http=0;;;;" | ||
42 | else | ||
43 | PERFS="$PERFS http=1;;;;" | ||
44 | fi | ||
45 | rm -rf Monitor | ||
46 | |||
47 | OUT=$(git clone -q https://git.immae.eu/perso/Immae/Projets/Ruby/Monitor.git 2>&1) | ||
48 | ERR=$? | ||
49 | if [ -n "$OUT" ]; then | ||
50 | OUTPUT="$OUTPUT | ||
51 | $OUT" | ||
52 | fi | ||
53 | if [ "$ERR" != 0 ]; then | ||
54 | ERRORS="$ERRORS https://" | ||
55 | PERFS="$PERFS https=0;;;;" | ||
56 | else | ||
57 | PERFS="$PERFS https=1;;;;" | ||
58 | fi | ||
59 | rm -rf Monitor | ||
60 | |||
61 | OUT=$(GIT_SSH_COMMAND="ssh -i $SSH_KEY -o BatchMode=yes -o UserKnownHostsFile=/dev/null -o CheckHostIP=no -o StrictHostKeyChecking=no" git clone -q gitolite@git.immae.eu:perso/Immae/Projets/Ruby/Monitor 2>&1) | ||
62 | ERR=$? | ||
63 | if [ -n "$OUT" ]; then | ||
64 | OUTPUT="$OUTPUT | ||
65 | $OUT" | ||
66 | fi | ||
67 | if [ "$ERR" != 0 ]; then | ||
68 | ERRORS="$ERRORS ssh" | ||
69 | PERFS="$PERFS ssh=0;;;;" | ||
70 | else | ||
71 | PERFS="$PERFS ssh=1;;;;" | ||
72 | fi | ||
73 | rm -rf Monitor | ||
74 | |||
75 | if [ -n "$ERRORS" ]; then | ||
76 | echo "gitolite CRITICAL - impossible to clone via$ERRORS | $PERFS" | ||
77 | exit 2 | ||
78 | else | ||
79 | echo "gitolite OK - ssh, git, http and https work | $PERFS" | ||
80 | exit 0 | ||
81 | fi | ||
diff --git a/flakes/private/monitoring/plugins/check_imap_connection b/flakes/private/monitoring/plugins/check_imap_connection new file mode 100755 index 0000000..c1ab0dd --- /dev/null +++ b/flakes/private/monitoring/plugins/check_imap_connection | |||
@@ -0,0 +1,52 @@ | |||
1 | #!/usr/bin/env perl | ||
2 | |||
3 | use strict; | ||
4 | use Getopt::Std; | ||
5 | $| = 1; | ||
6 | |||
7 | my %opts; | ||
8 | getopts('h:u:p:H:', \%opts); | ||
9 | |||
10 | my $STATE_OK = 0; | ||
11 | my $STATE_WARNING = 1; | ||
12 | my $STATE_CRITICAL = 2; | ||
13 | my $STATE_UNKNOWN = 3; | ||
14 | |||
15 | if ($opts{'h'} || !$opts{'u'} || !$opts{'p'} || !$opts{'H'}) { | ||
16 | &print_help(); | ||
17 | exit($STATE_UNKNOWN); | ||
18 | } | ||
19 | |||
20 | my $user = $opts{'u'}; | ||
21 | my $password = $opts{'p'}; | ||
22 | my $host = $opts{'H'}; | ||
23 | |||
24 | my $cmd_result = `(echo "a login $user $password"; echo "b logout") | openssl s_client -quiet -ign_eof -connect $host -starttls imap 2>&1`; | ||
25 | my $expected_result = "a OK Logged in"; | ||
26 | |||
27 | chomp($cmd_result); | ||
28 | if ($cmd_result !~ /$expected_result/) { | ||
29 | print "IMAP CRITICAL - Unable to connect via imaps | imap=0;;;;\n"; | ||
30 | exit($STATE_CRITICAL); | ||
31 | } else { | ||
32 | print "IMAP OK - imaps connected successfully | imap=1;;;;\n"; | ||
33 | exit($STATE_OK); | ||
34 | } | ||
35 | |||
36 | sub print_help() { | ||
37 | print << "EOF"; | ||
38 | Check whether imap works via ssl and is able to connect its database. | ||
39 | |||
40 | Options: | ||
41 | -h | ||
42 | Print detailed help screen | ||
43 | -u | ||
44 | User to log in as | ||
45 | -p | ||
46 | Password to log in | ||
47 | -H | ||
48 | Host to log in to | ||
49 | |||
50 | EOF | ||
51 | } | ||
52 | |||
diff --git a/flakes/private/monitoring/plugins/check_last_file_date b/flakes/private/monitoring/plugins/check_last_file_date new file mode 100755 index 0000000..f51a258 --- /dev/null +++ b/flakes/private/monitoring/plugins/check_last_file_date | |||
@@ -0,0 +1,28 @@ | |||
1 | #!/bin/bash | ||
2 | |||
3 | STATE_OK=0 | ||
4 | STATE_WARNING=1 | ||
5 | STATE_CRITICAL=2 | ||
6 | STATE_UNKNOWN=3 | ||
7 | |||
8 | base_path=$1 | ||
9 | hours=$2 | ||
10 | |||
11 | last_date=$(find $base_path -mindepth 1 -maxdepth 1 -printf "%T@\n" 2>/dev/null | sort | tail -n 1) | ||
12 | |||
13 | if [ -z "$last_date" ]; then | ||
14 | echo "UNKNOWN: Could not read folder" | ||
15 | exit $STATE_UNKNOWN | ||
16 | else | ||
17 | LC_ALL=C last_date=$(printf "%.*f" 0 $last_date) | ||
18 | LC_ALL=C age=$(( $(date "+%s") - $last_date)) | ||
19 | max_age=$(( $hours * 60 * 60 )) | ||
20 | min_date=$(date -d "$hours hours ago" "+%s") | ||
21 | if [ "$min_date" -lt "$last_date" ]; then | ||
22 | echo "OK: Last file $(date -d @$last_date) | age=${age}s;;$max_age;;" | ||
23 | exit $STATE_OK | ||
24 | else | ||
25 | echo "CRITICAL: Last file $(date -d @$last_date) | age=${age}s;;$max_age;;" | ||
26 | exit $STATE_CRITICAL | ||
27 | fi | ||
28 | fi | ||
diff --git a/flakes/private/monitoring/plugins/check_mem.sh b/flakes/private/monitoring/plugins/check_mem.sh new file mode 100755 index 0000000..3a29040 --- /dev/null +++ b/flakes/private/monitoring/plugins/check_mem.sh | |||
@@ -0,0 +1,31 @@ | |||
1 | #!/usr/bin/env bash | ||
2 | |||
3 | if [ "$1" = "-w" ] && [ "$2" -gt "0" ] && [ "$3" = "-c" ] && [ "$4" -gt "0" ]; then | ||
4 | FreeM=`free -m -w` | ||
5 | memTotal_m=`echo "$FreeM" |grep Mem |awk '{print $2}'` | ||
6 | memUsed_m=`echo "$FreeM" |grep Mem |awk '{print $3}'` | ||
7 | memFree_m=`echo "$FreeM" |grep Mem |awk '{print $4}'` | ||
8 | memShared_m=`echo "$FreeM" |grep Mem |awk '{print $5}'` | ||
9 | memBuffer_m=`echo "$FreeM" |grep Mem |awk '{print $6}'` | ||
10 | memCache_m=`echo "$FreeM" |grep Mem |awk '{print $7}'` | ||
11 | memAvailable_m=`echo "$FreeM" |grep Mem |awk '{print $8}'` | ||
12 | memUsedPrc=`echo $((($memUsed_m-$memBuffer_m-$memCache_m)*100/$memTotal_m))||cut -d. -f1` | ||
13 | if [ "$memUsedPrc" -ge "$4" ]; then | ||
14 | echo "Memory: CRITICAL Total: $memTotal_m MB - Used/Buffer/Cache: $memUsed_m MB - $memUsedPrc% used!|TOTAL=$memTotal_m;;;; USED=$memUsed_m;;;; SHARED=$memShared_m;;;; CACHE=$memCache_m;;;; AVAILABLE=$memAvailable_m;;;; BUFFER=$memBuffer_m;;;;" | ||
15 | exit 2 | ||
16 | elif [ "$memUsedPrc" -ge "$2" ]; then | ||
17 | echo "Memory: WARNING Total: $memTotal_m MB - Used/Buffer/Cache: $memUsed_m MB - $memUsedPrc% used!|TOTAL=$memTotal_m;;;; USED=$memUsed_m;;;; SHARED=$memShared_m;;;; CACHE=$memCache_m;;;; AVAILABLE=$memAvailable_m;;;; BUFFER=$memBuffer_m;;;;" | ||
18 | exit 1 | ||
19 | else | ||
20 | echo "Memory: OK Total: $memTotal_m MB - Used/Buffer/Cache: $memUsed_m MB - $memUsedPrc% used|TOTAL=$memTotal_m;;;; USED=$memUsed_m;;;; SHARED=$memShared_m;;;; CACHE=$memCache_m;;;; AVAILABLE=$memAvailable_m;;;; BUFFER=$memBuffer_m;;;;" | ||
21 | exit 0 | ||
22 | fi | ||
23 | else # If inputs are not as expected, print help. | ||
24 | sName="`echo $0|awk -F '/' '{print $NF}'`" | ||
25 | echo -e "\n\n\t\t### $sName Version 2.0###\n" | ||
26 | echo -e "# Usage:\t$sName -w <warnlevel> -c <critlevel>" | ||
27 | echo -e "\t\t= warnlevel and critlevel is percentage value without %\n" | ||
28 | echo "# EXAMPLE:\t/usr/lib64/nagios/plugins/$sName -w 80 -c 90" | ||
29 | echo -e "\nCopyright (C) 2012 Lukasz Gogolin (lukasz.gogolin@gmail.com), improved by Nestor 2015\n\n" | ||
30 | exit | ||
31 | fi | ||
diff --git a/flakes/private/monitoring/plugins/check_mysql_replication b/flakes/private/monitoring/plugins/check_mysql_replication new file mode 100755 index 0000000..1ee5de1 --- /dev/null +++ b/flakes/private/monitoring/plugins/check_mysql_replication | |||
@@ -0,0 +1,41 @@ | |||
1 | #!/bin/bash | ||
2 | |||
3 | STATE_OK=0 | ||
4 | STATE_WARNING=1 | ||
5 | STATE_CRITICAL=2 | ||
6 | STATE_UNKNOWN=3 | ||
7 | |||
8 | socket=$1 | ||
9 | config_file=$2 | ||
10 | info=$(mysql --defaults-file=${config_file} -S $socket -e "show slave status" --vertical) | ||
11 | exit_code=$? | ||
12 | |||
13 | lag=$(echo "$info" | grep "\bSeconds_Behind_Master\b" | cut -d':' -f2 | sed -e "s/\s//g") | ||
14 | |||
15 | IO_running=$(echo "$info" | grep "\bSlave_IO_Running\b" | cut -d':' -f2 | sed -e "s/\s//g") | ||
16 | SQL_running=$(echo "$info" | grep "\bSlave_SQL_Running\b" | cut -d':' -f2 | sed -e "s/\s//g") | ||
17 | |||
18 | if [[ $exit_code -ne 0 ]]; then | ||
19 | echo "UNKNOWN - Impossible to run mysql command" | ||
20 | exit $STATE_UNKNOWN | ||
21 | elif [[ -z "$lag" ]]; then | ||
22 | echo "UNKNOWN - No replication found for mysql" | ||
23 | exit $STATE_UNKNOWN | ||
24 | elif [[ "$IO_running" != "Yes" || "$SQL_running" != "Yes" ]]; then | ||
25 | echo "UNKNOWN - Replication is not running" | ||
26 | exit $STATE_UNKNOWN | ||
27 | else | ||
28 | output="Replication lag for mysql is ${lag}s" | ||
29 | LC_ALL=C lag=$(printf "%.*f" 0 $lag) | ||
30 | |||
31 | if [[ $lag -lt 5 ]]; then | ||
32 | echo "OK - $output | time=${lag}s;5;10;;" | ||
33 | exit $STATE_OK | ||
34 | elif [[ $lag -lt 10 ]]; then | ||
35 | echo "WARNING - $output | time=${lag}s;5;10;;" | ||
36 | exit $STATE_WARNING | ||
37 | else | ||
38 | echo "CRITICAL - $output | time=${lag}s;5;10;;" | ||
39 | exit $STATE_CRITICAL | ||
40 | fi | ||
41 | fi | ||
diff --git a/flakes/private/monitoring/plugins/check_openldap_replication b/flakes/private/monitoring/plugins/check_openldap_replication new file mode 100755 index 0000000..7136ad5 --- /dev/null +++ b/flakes/private/monitoring/plugins/check_openldap_replication | |||
@@ -0,0 +1,54 @@ | |||
1 | #!/bin/bash | ||
2 | |||
3 | STATE_OK=0 | ||
4 | STATE_WARNING=1 | ||
5 | STATE_CRITICAL=2 | ||
6 | STATE_UNKNOWN=3 | ||
7 | |||
8 | distant_host="$1" | ||
9 | replication_dn="$2" | ||
10 | replication_pw="$3" | ||
11 | base="$4" | ||
12 | config="$5" | ||
13 | |||
14 | to_date() { | ||
15 | i="$1" | ||
16 | i=$(echo "$i" | grep contextCSN | cut -d":" -f2 | sed -e "s/\s//g") | ||
17 | i=$(echo "$i" | cut -d"#" -f1) | ||
18 | i=$(echo "$i" | cut -d"." -f1) | ||
19 | echo "$i" | ||
20 | } | ||
21 | |||
22 | # ldap | ||
23 | remote_ldap=$(ldapsearch -H $distant_host -D "$replication_dn" -y "$replication_pw" -b "$base" -s base -LLL contextCSN ) | ||
24 | exit_code_remote=$? | ||
25 | remote_ldap=$(to_date "$remote_ldap") | ||
26 | |||
27 | # slapcat | ||
28 | local_ldap=$(slapcat -b "$base" -f "$config" -a "(entryDN=$base)") | ||
29 | exit_code_local=$? | ||
30 | local_ldap=$(to_date "$local_ldap") | ||
31 | |||
32 | offset=$(($remote_ldap - $local_ldap)) | ||
33 | |||
34 | if [[ $exit_code_remote -ne 0 || $exit_code_local -ne 0 ]]; then | ||
35 | echo "UNKNOWN - Impossible to run ldap command" | ||
36 | exit $STATE_UNKNOWN | ||
37 | elif [[ -z "$offset" ]]; then | ||
38 | echo "UNKNOWN - No replication found" | ||
39 | exit $STATE_UNKNOWN | ||
40 | else | ||
41 | output="Replication lag for openldap is ${offset}s" | ||
42 | LC_ALL=C lag=$(printf "%.*f" 0 $lag) | ||
43 | |||
44 | if [[ $offset -lt 5 ]]; then | ||
45 | echo "OK - $output | time=${offset}s;5;10;;" | ||
46 | exit $STATE_OK | ||
47 | elif [[ $offset -lt 10 ]]; then | ||
48 | echo "WARNING - $output | time=${offset}s;5;10;;" | ||
49 | exit $STATE_WARNING | ||
50 | else | ||
51 | echo "CRITICAL - $output | time=${offset}s;5;10;;" | ||
52 | exit $STATE_CRITICAL | ||
53 | fi | ||
54 | fi | ||
diff --git a/flakes/private/monitoring/plugins/check_ovh_sms b/flakes/private/monitoring/plugins/check_ovh_sms new file mode 100755 index 0000000..caf279c --- /dev/null +++ b/flakes/private/monitoring/plugins/check_ovh_sms | |||
@@ -0,0 +1,25 @@ | |||
1 | #!/usr/bin/env python | ||
2 | |||
3 | import sys | ||
4 | try: | ||
5 | import ovh | ||
6 | |||
7 | [endpoint, application_key, application_secret, consumer_key, account] = sys.argv[1].split(",") | ||
8 | client = ovh.Client( | ||
9 | endpoint=endpoint, | ||
10 | application_key=application_key, | ||
11 | application_secret=application_secret, | ||
12 | consumer_key=consumer_key, | ||
13 | ) | ||
14 | |||
15 | result = client.get('/sms/{}'.format(account))["creditsLeft"] | ||
16 | |||
17 | if result < 20: | ||
18 | print("SMS OVH Critical - Not enough sms left ({})|SMS={};;;;".format(result, result)) | ||
19 | sys.exit(2) | ||
20 | else: | ||
21 | print("SMS OVH Ok - Enough sms left ({})|SMS={};;;;".format(result, result)) | ||
22 | sys.exit(0) | ||
23 | except Exception: | ||
24 | print("SMS OVH UNKNOWN - Error during script") | ||
25 | sys.exit(3) | ||
diff --git a/flakes/private/monitoring/plugins/check_postgres_database_count b/flakes/private/monitoring/plugins/check_postgres_database_count new file mode 100755 index 0000000..43bdd8c --- /dev/null +++ b/flakes/private/monitoring/plugins/check_postgres_database_count | |||
@@ -0,0 +1,32 @@ | |||
1 | #!/bin/bash | ||
2 | |||
3 | STATE_OK=0 | ||
4 | STATE_WARNING=1 | ||
5 | STATE_CRITICAL=2 | ||
6 | STATE_UNKNOWN=3 | ||
7 | |||
8 | host=$1 | ||
9 | port=$2 | ||
10 | min=$3 | ||
11 | |||
12 | count=$(psql -h $host -p $port -A -q -c '\t' -c 'select count(datname) from pg_catalog.pg_database' postgres 2>&1) | ||
13 | exit_code=$? | ||
14 | |||
15 | if [[ $exit_code -ne 0 ]]; then | ||
16 | echo "UNKNOWN - Impossible to run psql command: $count" | ||
17 | exit $STATE_UNKNOWN | ||
18 | elif [[ -z "$count" ]]; then | ||
19 | echo "UNKNOWN - No database found" | ||
20 | exit $STATE_UNKNOWN | ||
21 | else | ||
22 | output="Database count is $count" | ||
23 | LC_ALL=C count=$(printf "%.*f" 0 $count) | ||
24 | |||
25 | if [[ $count -gt $min ]]; then | ||
26 | echo "OK - $output | count=${count};$min;$min;0;" | ||
27 | exit $STATE_OK | ||
28 | else | ||
29 | echo "CRITICAL - $output | count=${count};$min;$min;0;" | ||
30 | exit $STATE_CRITICAL | ||
31 | fi | ||
32 | fi | ||
diff --git a/flakes/private/monitoring/plugins/check_postgres_replication b/flakes/private/monitoring/plugins/check_postgres_replication new file mode 100755 index 0000000..ff257a3 --- /dev/null +++ b/flakes/private/monitoring/plugins/check_postgres_replication | |||
@@ -0,0 +1,35 @@ | |||
1 | #!/bin/bash | ||
2 | |||
3 | STATE_OK=0 | ||
4 | STATE_WARNING=1 | ||
5 | STATE_CRITICAL=2 | ||
6 | STATE_UNKNOWN=3 | ||
7 | |||
8 | user=$1 | ||
9 | host=$2 | ||
10 | port=$3 | ||
11 | |||
12 | lag=$(psql -h $host -p $port -A -t -c "SELECT COALESCE(EXTRACT(EPOCH FROM replay_lag),0) FROM pg_stat_replication WHERE usename='$user'" 2>/dev/null) | ||
13 | exit_code=$? | ||
14 | |||
15 | if [[ $exit_code -ne 0 ]]; then | ||
16 | echo "UNKNOWN - Impossible to run psql command" | ||
17 | exit $STATE_UNKNOWN | ||
18 | elif [[ -z "$lag" ]]; then | ||
19 | echo "UNKNOWN - No replication found for $user" | ||
20 | exit $STATE_UNKNOWN | ||
21 | else | ||
22 | output="Replication lag for $user is ${lag}s" | ||
23 | LC_ALL=C lag=$(printf "%.*f" 0 $lag) | ||
24 | |||
25 | if [[ $lag -lt 5 ]]; then | ||
26 | echo "OK - $output | time=${lag}s;5;10;0;" | ||
27 | exit $STATE_OK | ||
28 | elif [[ $lag -lt 10 ]]; then | ||
29 | echo "WARNING - $output | time=${lag}s;5;10;0;" | ||
30 | exit $STATE_WARNING | ||
31 | else | ||
32 | echo "CRITICAL - $output | time=${lag}s;5;10;0;" | ||
33 | exit $STATE_CRITICAL | ||
34 | fi | ||
35 | fi | ||
diff --git a/flakes/private/monitoring/plugins/check_redis_replication b/flakes/private/monitoring/plugins/check_redis_replication new file mode 100755 index 0000000..6dbe4c4 --- /dev/null +++ b/flakes/private/monitoring/plugins/check_redis_replication | |||
@@ -0,0 +1,38 @@ | |||
1 | #!/bin/bash | ||
2 | |||
3 | STATE_OK=0 | ||
4 | STATE_WARNING=1 | ||
5 | STATE_CRITICAL=2 | ||
6 | STATE_UNKNOWN=3 | ||
7 | |||
8 | socket=$1 | ||
9 | |||
10 | info=$(redis-cli -s $socket info replication) | ||
11 | lag=$(echo "$info" | grep master_last_io_seconds_ago | cut -d":" -f2 | sed -e "s/\s//g") | ||
12 | slave_offset=$(echo "$info" | grep slave_repl_offset | cut -d":" -f2 | sed -e "s/\s//g") | ||
13 | master_offset=$(echo "$info" | grep master_repl_offset | cut -d":" -f2 | sed -e "s/\s//g") | ||
14 | offset=$(($master_offset - $slave_offset)) | ||
15 | |||
16 | exit_code=$? | ||
17 | |||
18 | if [[ $exit_code -ne 0 ]]; then | ||
19 | echo "UNKNOWN - Impossible to run redis command" | ||
20 | exit $STATE_UNKNOWN | ||
21 | elif [[ -z "$lag" ]]; then | ||
22 | echo "UNKNOWN - No replication found" | ||
23 | exit $STATE_UNKNOWN | ||
24 | else | ||
25 | output="Replication lag for redis is ${lag}s and offset is ${offset}" | ||
26 | LC_ALL=C lag=$(printf "%.*f" 0 $lag) | ||
27 | |||
28 | if [[ $lag -lt 5 && $offset -lt 5 ]]; then | ||
29 | echo "OK - $output | time=${lag}s;5;10;0; offset=${offset};5;10;0;" | ||
30 | exit $STATE_OK | ||
31 | elif [[ $lag -lt 10 && $offset -lt 10 ]]; then | ||
32 | echo "WARNING - $output | time=${lag}s;5;10;0; offset=${offset};5;10;0;" | ||
33 | exit $STATE_WARNING | ||
34 | else | ||
35 | echo "CRITICAL - $output | time=${lag}s;5;10;0; offset=${offset};5;10;0;" | ||
36 | exit $STATE_CRITICAL | ||
37 | fi | ||
38 | fi | ||
diff --git a/flakes/private/monitoring/plugins/check_zfs_snapshot b/flakes/private/monitoring/plugins/check_zfs_snapshot new file mode 100755 index 0000000..56f8c4f --- /dev/null +++ b/flakes/private/monitoring/plugins/check_zfs_snapshot | |||
@@ -0,0 +1,325 @@ | |||
1 | #! /bin/sh | ||
2 | |||
3 | OS=$(uname) | ||
4 | |||
5 | # MIT License | ||
6 | # | ||
7 | # Copyright (c) 2016 Josef Friedrich <josef@friedrich.rocks> | ||
8 | # | ||
9 | # Permission is hereby granted, free of charge, to any person obtaining | ||
10 | # a copy of this software and associated documentation files (the | ||
11 | # "Software"), to deal in the Software without restriction, including | ||
12 | # without limitation the rights to use, copy, modify, merge, publish, | ||
13 | # distribute, sublicense, and/or sell copies of the Software, and to | ||
14 | # permit persons to whom the Software is furnished to do so, subject to | ||
15 | # the following conditions: | ||
16 | # | ||
17 | # The above copyright notice and this permission notice shall be | ||
18 | # included in all copies or substantial portions of the Software. | ||
19 | # | ||
20 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
21 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
22 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | ||
23 | # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | ||
24 | # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
25 | # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
26 | # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
27 | |||
28 | ######################################################################## | ||
29 | # Date functions | ||
30 | ######################################################################## | ||
31 | |||
32 | # This date function must be placed on the top of this file because | ||
33 | # they are used in some global variables. | ||
34 | |||
35 | # to_year ### | ||
36 | |||
37 | ## | ||
38 | # Get the four digit year integer from now. | ||
39 | # | ||
40 | # Return: | ||
41 | # The current 4 digit year. | ||
42 | ## | ||
43 | _now_to_year() { | ||
44 | date +%Y | ||
45 | } | ||
46 | |||
47 | ## | ||
48 | # Convert a date in the format YYYY-MM-DD to a four digit year integer. | ||
49 | # | ||
50 | # Parameters: | ||
51 | # a date in the format YYYY-MM-DD | ||
52 | # | ||
53 | # Return: | ||
54 | # four digit year integer | ||
55 | ## | ||
56 | _date_to_year() { | ||
57 | local OPTIONS | ||
58 | if [ "$OS" = 'Linux' ]; then | ||
59 | OPTIONS="--date $1" | ||
60 | # FreeBSD, Darwin | ||
61 | else | ||
62 | OPTIONS="-j -f %Y-%m-%d $1" | ||
63 | fi | ||
64 | date $OPTIONS +%Y | ||
65 | } | ||
66 | |||
67 | # to_datetime ### | ||
68 | |||
69 | ## | ||
70 | # Convert a UNIX timestamp to a datetime string. | ||
71 | # | ||
72 | # Parameters: | ||
73 | # UNIX timestamp | ||
74 | # | ||
75 | # Return: | ||
76 | # %Y-%m-%d.%H:%M:%S | ||
77 | ## | ||
78 | _timestamp_to_datetime() { | ||
79 | local OPTIONS | ||
80 | if [ "$OS" = 'Linux' ]; then | ||
81 | OPTIONS="--date @$1" | ||
82 | # FreeBSD, Darwin | ||
83 | else | ||
84 | OPTIONS="-j -f %s $1" | ||
85 | fi | ||
86 | date $OPTIONS +%Y-%m-%d.%H:%M:%S | ||
87 | } | ||
88 | |||
89 | # to_timestamp ### | ||
90 | |||
91 | ## | ||
92 | # Get the current UNIX timestamp. | ||
93 | # | ||
94 | # Return: | ||
95 | # %current UNIX timestamp | ||
96 | ## | ||
97 | _now_to_timestamp() { | ||
98 | date +%s | ||
99 | } | ||
100 | |||
101 | PROJECT_PAGES='https://github.com/Josef-Friedrich/check_zfs_snapshot | ||
102 | https://exchange.icinga.com/joseffriedrich/check_zfs_snapshot | ||
103 | https://exchange.nagios.org/directory/Plugins/System-Metrics/File-System/check_zfs_snapshot/details' | ||
104 | |||
105 | VERSION=1.2 | ||
106 | FIRST_RELEASE=2016-09-08 | ||
107 | SHORT_DESCRIPTION="Monitoring plugin to check how long ago the last \ | ||
108 | snapshot of a ZFS dataset was created." | ||
109 | USAGE="check_zfs_snapshot v$VERSION | ||
110 | Copyright (c) $(_date_to_year $FIRST_RELEASE)-$(_now_to_year) \ | ||
111 | Josef Friedrich <josef@friedrich.rocks> | ||
112 | |||
113 | $SHORT_DESCRIPTION | ||
114 | |||
115 | |||
116 | Usage: check_zfs_snapshot <options> | ||
117 | |||
118 | Options: | ||
119 | -c, --critical=OPT_CRITICAL | ||
120 | Interval in seconds for critical state. | ||
121 | -d, --dataset=OPT_DATASET | ||
122 | The ZFS dataset to check. | ||
123 | -h, --help | ||
124 | Show this help. | ||
125 | -s, --short-description | ||
126 | Show a short description of the command. | ||
127 | -v, --version | ||
128 | Show the version number. | ||
129 | -w, --warning=OPT_WARNING | ||
130 | Interval in seconds for warning state. Must be lower than -c | ||
131 | |||
132 | Performance data: | ||
133 | - last_ago | ||
134 | Time interval in seconds for last snapshot. | ||
135 | - warning | ||
136 | Interval in seconds. | ||
137 | - critical | ||
138 | Interval in seconds. | ||
139 | - snapshot_count | ||
140 | How many snapshot exists in the given dataset and all child | ||
141 | datasets exists. | ||
142 | " | ||
143 | |||
144 | # Exit codes | ||
145 | STATE_OK=0 | ||
146 | STATE_WARNING=1 | ||
147 | STATE_CRITICAL=2 | ||
148 | STATE_UNKNOWN=3 | ||
149 | |||
150 | _get_last_snapshot() { | ||
151 | zfs get creation -Hpr -t snapshot "$1" | \ | ||
152 | awk 'BEGIN {max = 0} {if ($3>max) max=$3} END {print max}' | ||
153 | } | ||
154 | |||
155 | _getopts() { | ||
156 | while getopts ':c:d:hsvw:-:' OPT ; do | ||
157 | case $OPT in | ||
158 | |||
159 | c) | ||
160 | OPT_CRITICAL=$OPTARG | ||
161 | ;; | ||
162 | |||
163 | d) | ||
164 | OPT_DATASET="$OPTARG" | ||
165 | ;; | ||
166 | |||
167 | h) | ||
168 | echo "$USAGE" | ||
169 | exit 0 | ||
170 | ;; | ||
171 | |||
172 | s) | ||
173 | echo "$SHORT_DESCRIPTION" | ||
174 | exit 0 | ||
175 | ;; | ||
176 | |||
177 | v) | ||
178 | echo "$VERSION" | ||
179 | exit 0 | ||
180 | ;; | ||
181 | |||
182 | w) | ||
183 | OPT_WARNING=$OPTARG | ||
184 | ;; | ||
185 | |||
186 | \?) | ||
187 | echo "Invalid option “-$OPTARG”!" >&2 | ||
188 | exit 2 | ||
189 | ;; | ||
190 | |||
191 | :) | ||
192 | echo "Option “-$OPTARG” requires an argument!" >&2 | ||
193 | exit 3 | ||
194 | ;; | ||
195 | |||
196 | -) | ||
197 | LONG_OPTARG="${OPTARG#*=}" | ||
198 | |||
199 | case $OPTARG in | ||
200 | |||
201 | critical=?*) | ||
202 | OPT_CRITICAL=$LONG_OPTARG | ||
203 | ;; | ||
204 | |||
205 | dataset=?*) | ||
206 | OPT_DATASET="$LONG_OPTARG" | ||
207 | ;; | ||
208 | |||
209 | help) | ||
210 | echo "$USAGE" | ||
211 | exit 0 | ||
212 | ;; | ||
213 | |||
214 | short-description) | ||
215 | echo "$SHORT_DESCRIPTION" | ||
216 | exit 0 | ||
217 | ;; | ||
218 | |||
219 | version) | ||
220 | echo "$VERSION" | ||
221 | exit 0 | ||
222 | ;; | ||
223 | |||
224 | warning=?*) | ||
225 | OPT_WARNING=$LONG_OPTARG | ||
226 | ;; | ||
227 | |||
228 | critical*|dataset*|warning*) | ||
229 | echo "Option “--$OPTARG” requires an argument!" >&2 | ||
230 | exit 3 | ||
231 | ;; | ||
232 | |||
233 | help*|short-description*|version*) | ||
234 | echo "No argument allowed for the option “--$OPTARG”!" >&2 | ||
235 | exit 4 | ||
236 | ;; | ||
237 | |||
238 | '') # "--" terminates argument processing | ||
239 | break | ||
240 | ;; | ||
241 | |||
242 | *) | ||
243 | echo "Invalid option “--$OPTARG”!" >&2 | ||
244 | exit 2 | ||
245 | ;; | ||
246 | |||
247 | esac | ||
248 | ;; | ||
249 | |||
250 | esac | ||
251 | done | ||
252 | } | ||
253 | |||
254 | _snapshot_count() { | ||
255 | # FreeBSD wc adds some whitespaces before the number! | ||
256 | # cat $HOME/debug | wc -l | ||
257 | # 7 | ||
258 | local COUNT | ||
259 | COUNT="$(zfs list -t snapshot | grep "$1" | wc -l)" | ||
260 | echo $COUNT | ||
261 | } | ||
262 | |||
263 | _performance_data() { | ||
264 | echo "| \ | ||
265 | last_ago=${DIFF}s;$OPT_WARNING;$OPT_CRITICAL;0 \ | ||
266 | count=$(_snapshot_count "$OPT_DATASET");;;0\ | ||
267 | " | ||
268 | } | ||
269 | |||
270 | ## This SEPARATOR is required for test purposes. Please don’t remove! ## | ||
271 | |||
272 | _getopts $@ | ||
273 | |||
274 | if [ -z "$OPT_WARNING" ]; then | ||
275 | # 1 day | ||
276 | OPT_WARNING=86400 | ||
277 | fi | ||
278 | |||
279 | if [ -z "$OPT_CRITICAL" ]; then | ||
280 | # 3 day | ||
281 | OPT_CRITICAL=259200 | ||
282 | fi | ||
283 | |||
284 | if [ -z "$OPT_DATASET" ]; then | ||
285 | echo "Dataset has to be set! Use option -d <dataset>" >&2 | ||
286 | echo "$USAGE" >&2 | ||
287 | exit $STATE_UNKNOWN | ||
288 | fi | ||
289 | |||
290 | if ! zfs list "$OPT_DATASET" > /dev/null 2>&1; then | ||
291 | echo "'$OPT_DATASET' is no ZFS dataset!" >&2 | ||
292 | echo "$USAGE" >&2 | ||
293 | exit $STATE_UNKNOWN | ||
294 | fi | ||
295 | |||
296 | NOW=$(_now_to_timestamp) | ||
297 | |||
298 | CREATION_DATE=$(_get_last_snapshot "$OPT_DATASET") | ||
299 | |||
300 | DIFF=$((NOW - CREATION_DATE)) | ||
301 | |||
302 | if [ "$OPT_WARNING" -gt "$OPT_CRITICAL" ]; then | ||
303 | echo '-w OPT_WARNING must be smaller than -c OPT_CRITICAL' | ||
304 | _usage >&2 | ||
305 | exit $STATE_UNKNOWN | ||
306 | fi | ||
307 | |||
308 | RETURN=STATE_UNKNOWN | ||
309 | |||
310 | if [ "$DIFF" -gt "$OPT_CRITICAL" ]; then | ||
311 | RETURN=$STATE_CRITICAL | ||
312 | MESSAGE="CRITICAL:" | ||
313 | elif [ "$DIFF" -gt "$OPT_WARNING" ]; then | ||
314 | RETURN=$STATE_WARNING | ||
315 | MESSAGE="WARNING:" | ||
316 | else | ||
317 | RETURN=$STATE_OK | ||
318 | MESSAGE="OK:" | ||
319 | fi | ||
320 | |||
321 | DATE="$(_timestamp_to_datetime "$CREATION_DATE")" | ||
322 | |||
323 | echo "$MESSAGE Last snapshot for dataset '$OPT_DATASET' was created on $DATE $(_performance_data)" | ||
324 | |||
325 | exit $RETURN | ||
diff --git a/flakes/private/monitoring/plugins/notify_by_apprise b/flakes/private/monitoring/plugins/notify_by_apprise new file mode 100755 index 0000000..82bc5a3 --- /dev/null +++ b/flakes/private/monitoring/plugins/notify_by_apprise | |||
@@ -0,0 +1,31 @@ | |||
1 | #!/usr/bin/env bash | ||
2 | |||
3 | APPRISE_USERNAME="Naemon" | ||
4 | APPRISE_USERICON="https://assets.immae.eu/monitoring.png" | ||
5 | APPRISE_URLS=$(echo "$2" | sed -e "s/{username}/$APPRISE_USERNAME/g" -e "s@{image_url}@$APPRISE_USERICON@g") | ||
6 | |||
7 | if [ "$SERVICESTATE" = "CRITICAL" ]; then | ||
8 | ICON="❗" | ||
9 | elif [ "$SERVICESTATE" = "WARNING" ]; then | ||
10 | ICON="⚠️:" | ||
11 | elif [ "$SERVICESTATE" = "OK" ]; then | ||
12 | ICON="✅" | ||
13 | elif [ "$SERVICESTATE" = "UNKNOWN" ]; then | ||
14 | ICON="❓" | ||
15 | elif [ "$HOSTSTATE" = "UP" ]; then | ||
16 | ICON="✅" | ||
17 | elif [ "$HOSTSTATE" = "DOWN" ]; then | ||
18 | ICON="❗" | ||
19 | elif [ "$HOSTSTATE" = "UNKNOWN" ]; then | ||
20 | ICON="❓" | ||
21 | elif [ "$HOSTSTATE" = "UNREACHABLE" ]; then | ||
22 | ICON="❓" | ||
23 | else | ||
24 | ICON="◻" | ||
25 | fi | ||
26 | |||
27 | if [ "$1" = "host" ]; then | ||
28 | apprise --title "${ICON} ${NOTIFICATIONTYPE} ${HOST} is ${HOSTSTATE}" --body "$HOSTOUTPUT" $APPRISE_URLS | ||
29 | else | ||
30 | apprise --title "${ICON} ${NOTIFICATIONTYPE} ${SERVICEDESC} on ${HOST} is ${SERVICESTATE}" --body "$SERVICEOUTPUT" $APPRISE_URLS | ||
31 | fi | ||
diff --git a/flakes/private/monitoring/plugins/notify_by_email b/flakes/private/monitoring/plugins/notify_by_email new file mode 100755 index 0000000..959db26 --- /dev/null +++ b/flakes/private/monitoring/plugins/notify_by_email | |||
@@ -0,0 +1,29 @@ | |||
1 | #!/usr/bin/env bash | ||
2 | |||
3 | # $1 = service/host | ||
4 | |||
5 | # $2 = type (PROBLEM RECOVERY ACKNOWLEDGEMENT FLAPPINGSTART FLAPPINGSTOP FLAPPINGDISABLED DOWNTIMESTART DOWNTIMESTOP DOWNTIMECANCELLED) | ||
6 | # http://www.naemon.org/documentation/usersguide/macrolist.html#notificationtype | ||
7 | |||
8 | # $3 = host alias | ||
9 | |||
10 | # $4 = date (YYYY-MM-DDTHH:MM:SS) | ||
11 | |||
12 | # $5 = E-mail | ||
13 | |||
14 | NOTIFICATION_TYPE="$2" | ||
15 | HOST_ALIAS="$3" | ||
16 | DATE="$4" | ||
17 | CONTACT="$5" | ||
18 | |||
19 | message="" | ||
20 | |||
21 | if [ "$1" = "host" ]; then | ||
22 | message=$(printf "%b" "***** Naemon *****\n\nNotification Type: $NOTIFICATION_TYPE\n\nHost: $HOST_ALIAS\nState: $HOSTSTATE\nInfo: $HOSTOUTPUT\n\nDate/Time: $DATE\n") | ||
23 | subject="** $NOTIFICATION_TYPE Host Alert: $HOST_ALIAS is $HOSTSTATE **" | ||
24 | else | ||
25 | message=$(printf "%b" "***** Naemon *****\n\nNotification Type: $NOTIFICATION_TYPE\n\nService: $SERVICEDESC\nHost: $HOST_ALIAS\nState: $SERVICESTATE\n\nDate/Time: $DATE\n\nAdditional Info:\n\n$SERVICEOUTPUT\n") | ||
26 | subject="** $NOTIFICATION_TYPE Service Alert: $HOST_ALIAS/$SERVICEDESC is $SERVICESTATE **" | ||
27 | fi | ||
28 | |||
29 | echo "$message" | MAILRC=/dev/null mail -r "$ADMINEMAIL" -n -s "$subject" "$CONTACT" | ||
diff --git a/flakes/private/monitoring/plugins/notify_by_slack b/flakes/private/monitoring/plugins/notify_by_slack new file mode 100755 index 0000000..1b16a0d --- /dev/null +++ b/flakes/private/monitoring/plugins/notify_by_slack | |||
@@ -0,0 +1,46 @@ | |||
1 | #!/usr/bin/env bash | ||
2 | |||
3 | SLACK_CHANNEL="$1" | ||
4 | SLACK_USERNAME="Naemon" | ||
5 | SLACK_URL="$2" | ||
6 | SLACK_USERICON="https://assets.immae.eu/monitoring.png" | ||
7 | |||
8 | if [ "$SERVICESTATE" = "CRITICAL" ]; then | ||
9 | ICON=":exclamation:" | ||
10 | COLOR="#DA0505" | ||
11 | elif [ "$SERVICESTATE" = "WARNING" ]; then | ||
12 | ICON=":warning:" | ||
13 | COLOR="#F1E903" | ||
14 | elif [ "$SERVICESTATE" = "OK" ]; then | ||
15 | ICON=":white_check_mark:" | ||
16 | COLOR="#36a64f" | ||
17 | elif [ "$SERVICESTATE" = "UNKNOWN" ]; then | ||
18 | ICON=":question:" | ||
19 | COLOR="#000000" | ||
20 | else | ||
21 | ICON=":white_medium_square:" | ||
22 | COLOR="#ffffff" | ||
23 | fi | ||
24 | |||
25 | payload=$(echo "{}" | jq -r \ | ||
26 | --arg "icon_url" "$SLACK_USERICON" \ | ||
27 | --arg "channel" "$SLACK_CHANNEL" \ | ||
28 | --arg "username" "$SLACK_USERNAME" \ | ||
29 | --arg "text" "${ICON} ${SERVICEDESC} on ${HOST} is ${SERVICESTATE}" \ | ||
30 | --arg "color" "$COLOR" \ | ||
31 | --arg "host" "$HOST" \ | ||
32 | --arg "desc" "$SERVICEDESC" \ | ||
33 | --arg "state" "$SERVICESTATE" \ | ||
34 | --arg "output" "$SERVICEOUTPUT" \ | ||
35 | '.icon_url = $icon_url | | ||
36 | .channel = $channel | | ||
37 | .username = $username | | ||
38 | .text = $text | | ||
39 | .attachments = [{fallback:"", color:$color,fields: [{},{},{},{}]}] | | ||
40 | .attachments[0].fields[0] = {title:"Host",value:$host,short:true} | | ||
41 | .attachments[0].fields[1] = {title:"Service description",value:$desc,short:true} | | ||
42 | .attachments[0].fields[2] = {title:"Status",value:$state,short:true} | | ||
43 | .attachments[0].fields[3] = {title:"Message",value:$output,short:false} | ||
44 | ') | ||
45 | |||
46 | curl -X POST --data "payload=$payload" $SLACK_URL | ||
diff --git a/flakes/private/monitoring/plugins/send_nrdp.sh b/flakes/private/monitoring/plugins/send_nrdp.sh new file mode 100755 index 0000000..c83c8cb --- /dev/null +++ b/flakes/private/monitoring/plugins/send_nrdp.sh | |||
@@ -0,0 +1,57 @@ | |||
1 | #!/bin/bash | ||
2 | |||
3 | TEMPLATE='{ | ||
4 | "cmd": "submitcheck", | ||
5 | "token": $token, | ||
6 | "checkresult": [{ | ||
7 | "hostname": $hostname, | ||
8 | "state": $state, | ||
9 | "output": $output, | ||
10 | "type": $type, | ||
11 | "servicename": $servicename | ||
12 | }] | ||
13 | }' | ||
14 | |||
15 | while getopts "u:t:H:s:S:o:" option | ||
16 | do | ||
17 | case $option in | ||
18 | u) url=$OPTARG ;; | ||
19 | t) token=$OPTARG ;; | ||
20 | H) hostname=$OPTARG ;; | ||
21 | s) servicename=$OPTARG ;; | ||
22 | S) state=$OPTARG ;; | ||
23 | o) output=$OPTARG ;; | ||
24 | esac | ||
25 | done | ||
26 | |||
27 | if [ -n "$servicename" ]; then | ||
28 | checktype="service" | ||
29 | else | ||
30 | checktype="host" | ||
31 | fi | ||
32 | |||
33 | payload=$(jq -n \ | ||
34 | --arg type "$checktype" \ | ||
35 | --arg hostname "$hostname" \ | ||
36 | --arg servicename "$servicename" \ | ||
37 | --arg output "$output" \ | ||
38 | --arg token "$token" \ | ||
39 | --arg state "$state" \ | ||
40 | "$TEMPLATE") | ||
41 | |||
42 | rslt=$(curl -f --silent --insecure -d "$payload" -H "Content-Type: application/json" "$url") | ||
43 | ret=$? | ||
44 | |||
45 | if [ $ret != 0 ];then | ||
46 | echo "ERROR: could not connect to NRDP server at $url" | ||
47 | exit 1 | ||
48 | fi | ||
49 | |||
50 | status=$(echo "$rslt" | jq -r .status) | ||
51 | message=$(echo "$rslt" | jq -r .message) | ||
52 | |||
53 | if [ "$status" != "ok" ];then | ||
54 | echo "ERROR: The NRDP Server said $message" | ||
55 | exit 2 | ||
56 | fi | ||
57 | echo "Sent 1 checks to $url" | ||