aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIsmaël Bouya <ismael.bouya@normalesup.org>2020-01-11 15:35:24 +0100
committerIsmaël Bouya <ismael.bouya@normalesup.org>2020-01-11 15:35:24 +0100
commit258441019881c451686dbe537069228cc8e49612 (patch)
tree2b788d0ad445608afe01e8801553ffe0a28de047
parent423c3f1caefdf5f125a2acf7456b5ca0273cee4b (diff)
downloadNix-258441019881c451686dbe537069228cc8e49612.tar.gz
Nix-258441019881c451686dbe537069228cc8e49612.tar.zst
Nix-258441019881c451686dbe537069228cc8e49612.zip
Add some monitoring services (eriomem)
-rw-r--r--modules/private/environment.nix1
-rw-r--r--modules/private/monitoring/default.nix6
-rw-r--r--modules/private/monitoring/objects_common.nix1
-rw-r--r--modules/private/monitoring/objects_immae-eu.nix23
-rw-r--r--modules/private/monitoring/objects_monitoring-1.nix13
-rwxr-xr-xmodules/private/monitoring/plugins/check_eriomem86
6 files changed, 106 insertions, 24 deletions
diff --git a/modules/private/environment.nix b/modules/private/environment.nix
index 50f153a..81b5df5 100644
--- a/modules/private/environment.nix
+++ b/modules/private/environment.nix
@@ -481,6 +481,7 @@ in
481 ssh_secret_key = mkOption { type = str; description = "SSH secret key"; }; 481 ssh_secret_key = mkOption { type = str; description = "SSH secret key"; };
482 imap_login = mkOption { type = str; description = "IMAP login"; }; 482 imap_login = mkOption { type = str; description = "IMAP login"; };
483 imap_password = mkOption { type = str; description = "IMAP password"; }; 483 imap_password = mkOption { type = str; description = "IMAP password"; };
484 eriomem_keys = mkOption { type = listOf (listOf str); description = "Eriomem keys"; default = []; };
484 nrdp_tokens = mkOption { type = listOf str; description = "Tokens allowed to push status update"; }; 485 nrdp_tokens = mkOption { type = listOf str; description = "Tokens allowed to push status update"; };
485 slack_url = mkOption { type = str; description = "Slack webhook url to push status update"; }; 486 slack_url = mkOption { type = str; description = "Slack webhook url to push status update"; };
486 slack_channel = mkOption { type = str; description = "Slack channel to push status update"; }; 487 slack_channel = mkOption { type = str; description = "Slack channel to push status update"; };
diff --git a/modules/private/monitoring/default.nix b/modules/private/monitoring/default.nix
index 2c2f693..e1357a7 100644
--- a/modules/private/monitoring/default.nix
+++ b/modules/private/monitoring/default.nix
@@ -38,6 +38,9 @@ let
38 wrapProgram $out/check_imap_connection --prefix PATH : ${lib.makeBinPath [ 38 wrapProgram $out/check_imap_connection --prefix PATH : ${lib.makeBinPath [
39 pkgs.openssl 39 pkgs.openssl
40 ]} 40 ]}
41 wrapProgram $out/check_eriomem --prefix PATH : ${lib.makeBinPath [
42 pkgs.s3cmd pkgs.python3
43 ]}
41 wrapProgram $out/notify_maison_bbc_by_email --prefix PATH : ${lib.makeBinPath [ 44 wrapProgram $out/notify_maison_bbc_by_email --prefix PATH : ${lib.makeBinPath [
42 pkgs.mailutils pkgs.gawk 45 pkgs.mailutils pkgs.gawk
43 ]} 46 ]}
@@ -55,7 +58,7 @@ let
55 loadWarn = "8.0"; loadAlert = "10.0"; 58 loadWarn = "8.0"; loadAlert = "10.0";
56 }; 59 };
57 backup-2 = { 60 backup-2 = {
58 processWarn = "50"; processAlert = "60"; 61 processWarn = "60"; processAlert = "70";
59 loadWarn = "1.0"; loadAlert = "2.0"; 62 loadWarn = "1.0"; loadAlert = "2.0";
60 }; 63 };
61 monitoring-1 = { 64 monitoring-1 = {
@@ -220,6 +223,7 @@ in
220 $USER205$=${config.myEnv.monitoring.imap_password} 223 $USER205$=${config.myEnv.monitoring.imap_password}
221 $USER206$=${config.myEnv.monitoring.slack_channel} 224 $USER206$=${config.myEnv.monitoring.slack_channel}
222 $USER207$=${config.myEnv.monitoring.slack_url} 225 $USER207$=${config.myEnv.monitoring.slack_url}
226 $USER208$=${builtins.concatStringsSep "," (map (builtins.concatStringsSep ":") config.myEnv.monitoring.eriomem_keys)}
223 ''; 227 '';
224 objectDefs = toObjects commonObjects 228 objectDefs = toObjects commonObjects
225 + toObjects hostObjects 229 + toObjects hostObjects
diff --git a/modules/private/monitoring/objects_common.nix b/modules/private/monitoring/objects_common.nix
index 7467306..15eee97 100644
--- a/modules/private/monitoring/objects_common.nix
+++ b/modules/private/monitoring/objects_common.nix
@@ -91,6 +91,7 @@ in
91 ]; 91 ];
92 command = { 92 command = {
93 check_dns = "$USER1$/check_dns -H $ARG1$ -s $HOSTADDRESS$ $ARG2$"; 93 check_dns = "$USER1$/check_dns -H $ARG1$ -s $HOSTADDRESS$ $ARG2$";
94 check_eriomem = "$USER2$/check_eriomem $USER208$";
94 check_external_dns = "$USER1$/check_dns -H $ARG2$ -s $ARG1$ $ARG3$"; 95 check_external_dns = "$USER1$/check_dns -H $ARG2$ -s $ARG1$ $ARG3$";
95 check_ftp_database = "$USER2$/check_ftp_database"; 96 check_ftp_database = "$USER2$/check_ftp_database";
96 check_git = "$USER2$/check_git $USER203$"; 97 check_git = "$USER2$/check_git $USER203$";
diff --git a/modules/private/monitoring/objects_immae-eu.nix b/modules/private/monitoring/objects_immae-eu.nix
index a1d1adb..a6337e9 100644
--- a/modules/private/monitoring/objects_immae-eu.nix
+++ b/modules/private/monitoring/objects_immae-eu.nix
@@ -65,18 +65,6 @@ in
65 } 65 }
66 66
67 { 67 {
68 service_description = "rsync backup happened not too long ago";
69 servicegroups = "webstatus-backup";
70 }
71
72 {
73 service_description = "eriomem backup is up and not full";
74 freshness_threshold = "10800";
75 notification_interval = "120";
76 servicegroups = "webstatus-backup";
77 }
78
79 {
80 service_description = "postfix service is active"; 68 service_description = "postfix service is active";
81 } 69 }
82 70
@@ -89,11 +77,6 @@ in
89 } 77 }
90 78
91 { 79 {
92 service_description = "httpd service is active";
93 servicegroups = "webstatus-resources";
94 }
95
96 {
97 service_description = "postfix SSL is up to date"; 80 service_description = "postfix SSL is up to date";
98 } 81 }
99 82
@@ -108,11 +91,5 @@ in
108 servicegroups = "webstatus-email"; 91 servicegroups = "webstatus-email";
109 freshness_threshold = "1350"; 92 freshness_threshold = "1350";
110 } 93 }
111
112 #### Web scenarios
113 {
114 service_description = "Default website site is running on ns208507.ip-188-165-209.eu";
115 freshness_threshold = "1800";
116 }
117 ]; 94 ];
118} 95}
diff --git a/modules/private/monitoring/objects_monitoring-1.nix b/modules/private/monitoring/objects_monitoring-1.nix
index a46b684..f69d3ff 100644
--- a/modules/private/monitoring/objects_monitoring-1.nix
+++ b/modules/private/monitoring/objects_monitoring-1.nix
@@ -77,6 +77,19 @@
77 _webstatus_url = "imap.immae.eu"; 77 _webstatus_url = "imap.immae.eu";
78 } 78 }
79 79
80 # Backup services
81 {
82 service_description = "Eriomem backup is up and not full";
83 host_name = "eldiron.immae.eu";
84 use = "external-service";
85 check_command = "check_eriomem";
86
87 check_interval = "120";
88 notification_interval = "120";
89
90 servicegroups = "webstatus-backup";
91 }
92
80 # DNS services 93 # DNS services
81 { 94 {
82 service_description = "eldiron dns is active and authoritative for aten.pro"; 95 service_description = "eldiron dns is active and authoritative for aten.pro";
diff --git a/modules/private/monitoring/plugins/check_eriomem b/modules/private/monitoring/plugins/check_eriomem
new file mode 100755
index 0000000..65ca790
--- /dev/null
+++ b/modules/private/monitoring/plugins/check_eriomem
@@ -0,0 +1,86 @@
1#!/usr/bin/env python
2import os
3import sys
4import getopt
5import signal
6from subprocess import Popen, PIPE
7
8STATE_OK = 0
9STATE_WARNING = 1
10STATE_CRITICAL = 2
11STATE_UNKNOWN = 3
12
13keys = sys.argv[1].split(",")
14
15def to_args(k):
16 access, secret = k.split(":", 1)
17 return [
18 "s3cmd",
19 '-c=/dev/null',
20 '--no-check-certificate',
21 '--access_key={}'.format(access),
22 '--secret_key={}'.format(secret),
23 '--host=e.eriomem.net',
24 '--host-bucket=%(bucket)s.e.eriomem.net',
25 'du'
26 ]
27
28ARGS1= to_args(keys[0])
29ARGS2= to_args(keys[1])
30
31max_size = 1024*1024*1024*1024
32warning_percent = 99.75
33critical_percent = 99.95
34
35def output(code, msg):
36 print(msg)
37 sys.exit(code)
38
39def main():
40 def handler(signum, frame):
41 raise IOError
42 signal.signal(signal.SIGALRM, handler)
43 signal.alarm(60)
44
45 try:
46 ps = [Popen(to_args(a), stdout=PIPE, stderr=PIPE) for a in keys]
47 outs = [p.communicate() for p in ps]
48 rets = [p.wait() for p in ps]
49 except IOError:
50 for p in ps:
51 os.kill(p.pid, signal.SIGTERM)
52 output(STATE_UNKNOWN,
53 "eriomem UNKNOWN - Command timeout after 60 seconds!")
54
55 signal.alarm(0)
56
57 if sum(rets) == 0:
58 usages = [int(out[0].decode().split("\n")[-2].split()[0]) for out in outs]
59 usage = sum(usages)
60 use_percent = 100 * usage / max_size
61 if use_percent > critical_percent:
62 output(STATE_CRITICAL,
63 "eriomem CRITICAL - bucket usage: %s (%s%%);| size=%s;;;;" %
64 (sizeof_fmt(usage), use_percent, usage))
65 elif use_percent > warning_percent:
66 output(STATE_WARNING,
67 "eriomem WARNING - bucket usage: %s (%s%%);| size=%s;;;;" %
68 (sizeof_fmt(usage), use_percent, usage))
69 else:
70 output(STATE_OK,
71 "eriomem OK - bucket usage: %s (%d%%);| size=%s;;;;" %
72 (sizeof_fmt(usage), use_percent, usage))
73 else:
74 messages = "\n".join([out[0].decode() + out[1].decode() for out in outs])
75 output(STATE_UNKNOWN,
76 "eriomem Unknown - Error in command\n" + messages)
77
78def sizeof_fmt(num):
79 for unit in ['','ko','Mo','Go','To','Po','Eo','Zo']:
80 if abs(num) < 1024.0:
81 return "%3.1f%s" % (num, unit)
82 num /= 1024.0
83 return "%.1f%s%s" % (num, 'Yo')
84
85if __name__ == '__main__':
86 main()