diff options
-rw-r--r-- | modules/private/environment.nix | 1 | ||||
-rw-r--r-- | modules/private/monitoring/default.nix | 6 | ||||
-rw-r--r-- | modules/private/monitoring/objects_common.nix | 1 | ||||
-rw-r--r-- | modules/private/monitoring/objects_immae-eu.nix | 23 | ||||
-rw-r--r-- | modules/private/monitoring/objects_monitoring-1.nix | 13 | ||||
-rwxr-xr-x | modules/private/monitoring/plugins/check_eriomem | 86 |
6 files changed, 106 insertions, 24 deletions
diff --git a/modules/private/environment.nix b/modules/private/environment.nix index 50f153a..81b5df5 100644 --- a/modules/private/environment.nix +++ b/modules/private/environment.nix | |||
@@ -481,6 +481,7 @@ in | |||
481 | ssh_secret_key = mkOption { type = str; description = "SSH secret key"; }; | 481 | ssh_secret_key = mkOption { type = str; description = "SSH secret key"; }; |
482 | imap_login = mkOption { type = str; description = "IMAP login"; }; | 482 | imap_login = mkOption { type = str; description = "IMAP login"; }; |
483 | imap_password = mkOption { type = str; description = "IMAP password"; }; | 483 | imap_password = mkOption { type = str; description = "IMAP password"; }; |
484 | eriomem_keys = mkOption { type = listOf (listOf str); description = "Eriomem keys"; default = []; }; | ||
484 | nrdp_tokens = mkOption { type = listOf str; description = "Tokens allowed to push status update"; }; | 485 | nrdp_tokens = mkOption { type = listOf str; description = "Tokens allowed to push status update"; }; |
485 | slack_url = mkOption { type = str; description = "Slack webhook url to push status update"; }; | 486 | slack_url = mkOption { type = str; description = "Slack webhook url to push status update"; }; |
486 | slack_channel = mkOption { type = str; description = "Slack channel to push status update"; }; | 487 | slack_channel = mkOption { type = str; description = "Slack channel to push status update"; }; |
diff --git a/modules/private/monitoring/default.nix b/modules/private/monitoring/default.nix index 2c2f693..e1357a7 100644 --- a/modules/private/monitoring/default.nix +++ b/modules/private/monitoring/default.nix | |||
@@ -38,6 +38,9 @@ let | |||
38 | wrapProgram $out/check_imap_connection --prefix PATH : ${lib.makeBinPath [ | 38 | wrapProgram $out/check_imap_connection --prefix PATH : ${lib.makeBinPath [ |
39 | pkgs.openssl | 39 | pkgs.openssl |
40 | ]} | 40 | ]} |
41 | wrapProgram $out/check_eriomem --prefix PATH : ${lib.makeBinPath [ | ||
42 | pkgs.s3cmd pkgs.python3 | ||
43 | ]} | ||
41 | wrapProgram $out/notify_maison_bbc_by_email --prefix PATH : ${lib.makeBinPath [ | 44 | wrapProgram $out/notify_maison_bbc_by_email --prefix PATH : ${lib.makeBinPath [ |
42 | pkgs.mailutils pkgs.gawk | 45 | pkgs.mailutils pkgs.gawk |
43 | ]} | 46 | ]} |
@@ -55,7 +58,7 @@ let | |||
55 | loadWarn = "8.0"; loadAlert = "10.0"; | 58 | loadWarn = "8.0"; loadAlert = "10.0"; |
56 | }; | 59 | }; |
57 | backup-2 = { | 60 | backup-2 = { |
58 | processWarn = "50"; processAlert = "60"; | 61 | processWarn = "60"; processAlert = "70"; |
59 | loadWarn = "1.0"; loadAlert = "2.0"; | 62 | loadWarn = "1.0"; loadAlert = "2.0"; |
60 | }; | 63 | }; |
61 | monitoring-1 = { | 64 | monitoring-1 = { |
@@ -220,6 +223,7 @@ in | |||
220 | $USER205$=${config.myEnv.monitoring.imap_password} | 223 | $USER205$=${config.myEnv.monitoring.imap_password} |
221 | $USER206$=${config.myEnv.monitoring.slack_channel} | 224 | $USER206$=${config.myEnv.monitoring.slack_channel} |
222 | $USER207$=${config.myEnv.monitoring.slack_url} | 225 | $USER207$=${config.myEnv.monitoring.slack_url} |
226 | $USER208$=${builtins.concatStringsSep "," (map (builtins.concatStringsSep ":") config.myEnv.monitoring.eriomem_keys)} | ||
223 | ''; | 227 | ''; |
224 | objectDefs = toObjects commonObjects | 228 | objectDefs = toObjects commonObjects |
225 | + toObjects hostObjects | 229 | + toObjects hostObjects |
diff --git a/modules/private/monitoring/objects_common.nix b/modules/private/monitoring/objects_common.nix index 7467306..15eee97 100644 --- a/modules/private/monitoring/objects_common.nix +++ b/modules/private/monitoring/objects_common.nix | |||
@@ -91,6 +91,7 @@ in | |||
91 | ]; | 91 | ]; |
92 | command = { | 92 | command = { |
93 | check_dns = "$USER1$/check_dns -H $ARG1$ -s $HOSTADDRESS$ $ARG2$"; | 93 | check_dns = "$USER1$/check_dns -H $ARG1$ -s $HOSTADDRESS$ $ARG2$"; |
94 | check_eriomem = "$USER2$/check_eriomem $USER208$"; | ||
94 | check_external_dns = "$USER1$/check_dns -H $ARG2$ -s $ARG1$ $ARG3$"; | 95 | check_external_dns = "$USER1$/check_dns -H $ARG2$ -s $ARG1$ $ARG3$"; |
95 | check_ftp_database = "$USER2$/check_ftp_database"; | 96 | check_ftp_database = "$USER2$/check_ftp_database"; |
96 | check_git = "$USER2$/check_git $USER203$"; | 97 | check_git = "$USER2$/check_git $USER203$"; |
diff --git a/modules/private/monitoring/objects_immae-eu.nix b/modules/private/monitoring/objects_immae-eu.nix index a1d1adb..a6337e9 100644 --- a/modules/private/monitoring/objects_immae-eu.nix +++ b/modules/private/monitoring/objects_immae-eu.nix | |||
@@ -65,18 +65,6 @@ in | |||
65 | } | 65 | } |
66 | 66 | ||
67 | { | 67 | { |
68 | service_description = "rsync backup happened not too long ago"; | ||
69 | servicegroups = "webstatus-backup"; | ||
70 | } | ||
71 | |||
72 | { | ||
73 | service_description = "eriomem backup is up and not full"; | ||
74 | freshness_threshold = "10800"; | ||
75 | notification_interval = "120"; | ||
76 | servicegroups = "webstatus-backup"; | ||
77 | } | ||
78 | |||
79 | { | ||
80 | service_description = "postfix service is active"; | 68 | service_description = "postfix service is active"; |
81 | } | 69 | } |
82 | 70 | ||
@@ -89,11 +77,6 @@ in | |||
89 | } | 77 | } |
90 | 78 | ||
91 | { | 79 | { |
92 | service_description = "httpd service is active"; | ||
93 | servicegroups = "webstatus-resources"; | ||
94 | } | ||
95 | |||
96 | { | ||
97 | service_description = "postfix SSL is up to date"; | 80 | service_description = "postfix SSL is up to date"; |
98 | } | 81 | } |
99 | 82 | ||
@@ -108,11 +91,5 @@ in | |||
108 | servicegroups = "webstatus-email"; | 91 | servicegroups = "webstatus-email"; |
109 | freshness_threshold = "1350"; | 92 | freshness_threshold = "1350"; |
110 | } | 93 | } |
111 | |||
112 | #### Web scenarios | ||
113 | { | ||
114 | service_description = "Default website site is running on ns208507.ip-188-165-209.eu"; | ||
115 | freshness_threshold = "1800"; | ||
116 | } | ||
117 | ]; | 94 | ]; |
118 | } | 95 | } |
diff --git a/modules/private/monitoring/objects_monitoring-1.nix b/modules/private/monitoring/objects_monitoring-1.nix index a46b684..f69d3ff 100644 --- a/modules/private/monitoring/objects_monitoring-1.nix +++ b/modules/private/monitoring/objects_monitoring-1.nix | |||
@@ -77,6 +77,19 @@ | |||
77 | _webstatus_url = "imap.immae.eu"; | 77 | _webstatus_url = "imap.immae.eu"; |
78 | } | 78 | } |
79 | 79 | ||
80 | # Backup services | ||
81 | { | ||
82 | service_description = "Eriomem backup is up and not full"; | ||
83 | host_name = "eldiron.immae.eu"; | ||
84 | use = "external-service"; | ||
85 | check_command = "check_eriomem"; | ||
86 | |||
87 | check_interval = "120"; | ||
88 | notification_interval = "120"; | ||
89 | |||
90 | servicegroups = "webstatus-backup"; | ||
91 | } | ||
92 | |||
80 | # DNS services | 93 | # DNS services |
81 | { | 94 | { |
82 | service_description = "eldiron dns is active and authoritative for aten.pro"; | 95 | service_description = "eldiron dns is active and authoritative for aten.pro"; |
diff --git a/modules/private/monitoring/plugins/check_eriomem b/modules/private/monitoring/plugins/check_eriomem new file mode 100755 index 0000000..65ca790 --- /dev/null +++ b/modules/private/monitoring/plugins/check_eriomem | |||
@@ -0,0 +1,86 @@ | |||
1 | #!/usr/bin/env python | ||
2 | import os | ||
3 | import sys | ||
4 | import getopt | ||
5 | import signal | ||
6 | from subprocess import Popen, PIPE | ||
7 | |||
8 | STATE_OK = 0 | ||
9 | STATE_WARNING = 1 | ||
10 | STATE_CRITICAL = 2 | ||
11 | STATE_UNKNOWN = 3 | ||
12 | |||
13 | keys = sys.argv[1].split(",") | ||
14 | |||
15 | def to_args(k): | ||
16 | access, secret = k.split(":", 1) | ||
17 | return [ | ||
18 | "s3cmd", | ||
19 | '-c=/dev/null', | ||
20 | '--no-check-certificate', | ||
21 | '--access_key={}'.format(access), | ||
22 | '--secret_key={}'.format(secret), | ||
23 | '--host=e.eriomem.net', | ||
24 | '--host-bucket=%(bucket)s.e.eriomem.net', | ||
25 | 'du' | ||
26 | ] | ||
27 | |||
28 | ARGS1= to_args(keys[0]) | ||
29 | ARGS2= to_args(keys[1]) | ||
30 | |||
31 | max_size = 1024*1024*1024*1024 | ||
32 | warning_percent = 99.75 | ||
33 | critical_percent = 99.95 | ||
34 | |||
35 | def output(code, msg): | ||
36 | print(msg) | ||
37 | sys.exit(code) | ||
38 | |||
39 | def main(): | ||
40 | def handler(signum, frame): | ||
41 | raise IOError | ||
42 | signal.signal(signal.SIGALRM, handler) | ||
43 | signal.alarm(60) | ||
44 | |||
45 | try: | ||
46 | ps = [Popen(to_args(a), stdout=PIPE, stderr=PIPE) for a in keys] | ||
47 | outs = [p.communicate() for p in ps] | ||
48 | rets = [p.wait() for p in ps] | ||
49 | except IOError: | ||
50 | for p in ps: | ||
51 | os.kill(p.pid, signal.SIGTERM) | ||
52 | output(STATE_UNKNOWN, | ||
53 | "eriomem UNKNOWN - Command timeout after 60 seconds!") | ||
54 | |||
55 | signal.alarm(0) | ||
56 | |||
57 | if sum(rets) == 0: | ||
58 | usages = [int(out[0].decode().split("\n")[-2].split()[0]) for out in outs] | ||
59 | usage = sum(usages) | ||
60 | use_percent = 100 * usage / max_size | ||
61 | if use_percent > critical_percent: | ||
62 | output(STATE_CRITICAL, | ||
63 | "eriomem CRITICAL - bucket usage: %s (%s%%);| size=%s;;;;" % | ||
64 | (sizeof_fmt(usage), use_percent, usage)) | ||
65 | elif use_percent > warning_percent: | ||
66 | output(STATE_WARNING, | ||
67 | "eriomem WARNING - bucket usage: %s (%s%%);| size=%s;;;;" % | ||
68 | (sizeof_fmt(usage), use_percent, usage)) | ||
69 | else: | ||
70 | output(STATE_OK, | ||
71 | "eriomem OK - bucket usage: %s (%d%%);| size=%s;;;;" % | ||
72 | (sizeof_fmt(usage), use_percent, usage)) | ||
73 | else: | ||
74 | messages = "\n".join([out[0].decode() + out[1].decode() for out in outs]) | ||
75 | output(STATE_UNKNOWN, | ||
76 | "eriomem Unknown - Error in command\n" + messages) | ||
77 | |||
78 | def sizeof_fmt(num): | ||
79 | for unit in ['','ko','Mo','Go','To','Po','Eo','Zo']: | ||
80 | if abs(num) < 1024.0: | ||
81 | return "%3.1f%s" % (num, unit) | ||
82 | num /= 1024.0 | ||
83 | return "%.1f%s%s" % (num, 'Yo') | ||
84 | |||
85 | if __name__ == '__main__': | ||
86 | main() | ||