]> git.immae.eu Git - perso/Immae/Config/Nix.git/commitdiff
Add some monitoring services (eriomem)
authorIsmaël Bouya <ismael.bouya@normalesup.org>
Sat, 11 Jan 2020 14:35:24 +0000 (15:35 +0100)
committerIsmaël Bouya <ismael.bouya@normalesup.org>
Sat, 11 Jan 2020 14:35:24 +0000 (15:35 +0100)
modules/private/environment.nix
modules/private/monitoring/default.nix
modules/private/monitoring/objects_common.nix
modules/private/monitoring/objects_immae-eu.nix
modules/private/monitoring/objects_monitoring-1.nix
modules/private/monitoring/plugins/check_eriomem [new file with mode: 0755]

index 50f153ac141616df1ef7959a9eb1c0e920122274..81b5df505ecd3d71fd52b0ede395b2c68ed6c106 100644 (file)
@@ -481,6 +481,7 @@ in
           ssh_secret_key = mkOption { type = str; description = "SSH secret key"; };
           imap_login = mkOption { type = str; description = "IMAP login"; };
           imap_password = mkOption { type = str; description = "IMAP password"; };
+          eriomem_keys = mkOption { type = listOf (listOf str); description = "Eriomem keys"; default = []; };
           nrdp_tokens = mkOption { type = listOf str; description = "Tokens allowed to push status update"; };
           slack_url = mkOption { type = str; description = "Slack webhook url to push status update"; };
           slack_channel = mkOption { type = str; description = "Slack channel to push status update"; };
index 2c2f693a9a0fd64dff62ba53cca937dd1b9d92b8..e1357a75544a73ebc8c55e56f46b32b63af1bd3a 100644 (file)
@@ -38,6 +38,9 @@ let
     wrapProgram $out/check_imap_connection --prefix PATH : ${lib.makeBinPath [
       pkgs.openssl
     ]}
+    wrapProgram $out/check_eriomem --prefix PATH : ${lib.makeBinPath [
+      pkgs.s3cmd pkgs.python3
+    ]}
     wrapProgram $out/notify_maison_bbc_by_email --prefix PATH : ${lib.makeBinPath [
       pkgs.mailutils pkgs.gawk
     ]}
@@ -55,7 +58,7 @@ let
       loadWarn = "8.0"; loadAlert = "10.0";
     };
     backup-2 = {
-      processWarn = "50"; processAlert = "60";
+      processWarn = "60"; processAlert = "70";
       loadWarn = "1.0"; loadAlert = "2.0";
     };
     monitoring-1 = {
@@ -220,6 +223,7 @@ in
         $USER205$=${config.myEnv.monitoring.imap_password}
         $USER206$=${config.myEnv.monitoring.slack_channel}
         $USER207$=${config.myEnv.monitoring.slack_url}
+        $USER208$=${builtins.concatStringsSep "," (map (builtins.concatStringsSep ":") config.myEnv.monitoring.eriomem_keys)}
       '';
       objectDefs = toObjects commonObjects
         + toObjects hostObjects
index 7467306c323926da8d47d7c501df508018062b80..15eee97b732e73acae0c293fbe2052e7125a0de2 100644 (file)
@@ -91,6 +91,7 @@ in
   ];
   command = {
     check_dns = "$USER1$/check_dns -H $ARG1$ -s $HOSTADDRESS$ $ARG2$";
+    check_eriomem = "$USER2$/check_eriomem $USER208$";
     check_external_dns = "$USER1$/check_dns -H $ARG2$ -s $ARG1$ $ARG3$";
     check_ftp_database = "$USER2$/check_ftp_database";
     check_git = "$USER2$/check_git $USER203$";
index a1d1adb74dc842a0a46fe7a7b7443e78674b9e92..a6337e93074c30588f07bfa93d558d74ce674c95 100644 (file)
@@ -64,18 +64,6 @@ in
       notification_interval = "0";
     }
 
-    {
-      service_description = "rsync backup happened not too long ago";
-      servicegroups = "webstatus-backup";
-    }
-
-    {
-      service_description = "eriomem backup is up and not full";
-      freshness_threshold = "10800";
-      notification_interval = "120";
-      servicegroups = "webstatus-backup";
-    }
-
     {
       service_description = "postfix service is active";
     }
@@ -88,11 +76,6 @@ in
       service_description = "sshd service is active";
     }
 
-    {
-      service_description = "httpd service is active";
-      servicegroups = "webstatus-resources";
-    }
-
     {
       service_description = "postfix SSL is up to date";
     }
@@ -108,11 +91,5 @@ in
       servicegroups = "webstatus-email";
       freshness_threshold = "1350";
     }
-
-    #### Web scenarios
-    {
-      service_description = "Default website site is running on ns208507.ip-188-165-209.eu";
-      freshness_threshold = "1800";
-    }
   ];
 }
index a46b684ebc3c25a1a406fd5a87b587bcffdece75..f69d3ffb37268c8039b17e155d3859e13eb74009 100644 (file)
       _webstatus_url = "imap.immae.eu";
     }
 
+    # Backup services
+    {
+      service_description = "Eriomem backup is up and not full";
+      host_name = "eldiron.immae.eu";
+      use = "external-service";
+      check_command = "check_eriomem";
+
+      check_interval = "120";
+      notification_interval = "120";
+
+      servicegroups = "webstatus-backup";
+    }
+
     # DNS services
     {
       service_description = "eldiron dns is active and authoritative for aten.pro";
diff --git a/modules/private/monitoring/plugins/check_eriomem b/modules/private/monitoring/plugins/check_eriomem
new file mode 100755 (executable)
index 0000000..65ca790
--- /dev/null
@@ -0,0 +1,86 @@
+#!/usr/bin/env python
+import os
+import sys
+import getopt
+import signal
+from subprocess import Popen, PIPE
+
+STATE_OK = 0
+STATE_WARNING = 1
+STATE_CRITICAL = 2
+STATE_UNKNOWN = 3
+
+keys = sys.argv[1].split(",")
+
+def to_args(k):
+    access, secret = k.split(":", 1)
+    return [
+            "s3cmd",
+            '-c=/dev/null',
+            '--no-check-certificate',
+            '--access_key={}'.format(access),
+            '--secret_key={}'.format(secret),
+            '--host=e.eriomem.net',
+            '--host-bucket=%(bucket)s.e.eriomem.net',
+            'du'
+            ]
+
+ARGS1= to_args(keys[0])
+ARGS2= to_args(keys[1])
+
+max_size = 1024*1024*1024*1024
+warning_percent = 99.75
+critical_percent = 99.95
+
+def output(code, msg):
+    print(msg)
+    sys.exit(code)
+
+def main():
+    def handler(signum, frame):
+        raise IOError
+    signal.signal(signal.SIGALRM, handler)
+    signal.alarm(60)
+
+    try:
+        ps = [Popen(to_args(a), stdout=PIPE, stderr=PIPE) for a in keys]
+        outs = [p.communicate() for p in ps]
+        rets = [p.wait() for p in ps]
+    except IOError:
+        for p in ps:
+            os.kill(p.pid, signal.SIGTERM)
+        output(STATE_UNKNOWN,
+                "eriomem UNKNOWN - Command timeout after 60 seconds!")
+
+    signal.alarm(0)
+
+    if sum(rets) == 0:
+        usages = [int(out[0].decode().split("\n")[-2].split()[0]) for out in outs]
+        usage = sum(usages)
+        use_percent = 100 * usage / max_size
+        if use_percent > critical_percent:
+            output(STATE_CRITICAL,
+                    "eriomem CRITICAL - bucket usage: %s (%s%%);| size=%s;;;;" %
+                    (sizeof_fmt(usage), use_percent, usage))
+        elif use_percent > warning_percent:
+            output(STATE_WARNING,
+                    "eriomem WARNING - bucket usage: %s (%s%%);| size=%s;;;;" %
+                    (sizeof_fmt(usage), use_percent, usage))
+        else:
+            output(STATE_OK,
+                    "eriomem OK - bucket usage: %s (%d%%);| size=%s;;;;" %
+                    (sizeof_fmt(usage), use_percent, usage))
+    else:
+        messages = "\n".join([out[0].decode() + out[1].decode() for out in outs])
+        output(STATE_UNKNOWN,
+                "eriomem Unknown - Error in command\n" + messages)
+
+def sizeof_fmt(num):
+    for unit in ['','ko','Mo','Go','To','Po','Eo','Zo']:
+        if abs(num) < 1024.0:
+            return "%3.1f%s" % (num, unit)
+        num /= 1024.0
+    return "%.1f%s%s" % (num, 'Yo')
+
+if __name__ == '__main__':
+    main()