]> git.immae.eu Git - perso/Immae/Config/Nix.git/commitdiff
Adjust monitoring resources and add bandwidth checks
authorIsmaël Bouya <ismael.bouya@normalesup.org>
Thu, 2 Jul 2020 10:56:25 +0000 (12:56 +0200)
committerIsmaël Bouya <ismael.bouya@normalesup.org>
Thu, 2 Jul 2020 10:56:25 +0000 (12:56 +0200)
modules/private/monitoring/default.nix
modules/private/monitoring/objects_common.nix
modules/private/monitoring/objects_monitoring-1.nix
modules/private/monitoring/plugins/check_bandwidth [new file with mode: 0755]

index 316c2ddff5dd56688979e7b3bc0d55a06e90ab2c..8f8b6c0d1494adb803a79604565972c4f22ba18a 100644 (file)
@@ -70,20 +70,26 @@ let
     wrapProgram $out/check_ovh_sms --prefix PATH : ${lib.makeBinPath [
       (pkgs.python3.withPackages (ps: [ps.ovh]))
     ]}
+    wrapProgram $out/check_bandwidth --prefix PATH : ${lib.makeBinPath [
+      pkgs.iproute pkgs.bc
+    ]}
     '';
   toObjects = pkgs.callPackage ./to_objects.nix {};
   commonConfig = {
     eldiron = {
       processWarn = "250"; processAlert = "400";
       loadWarn = "8.0"; loadAlert = "10.0";
+      interface = "eth0";
     };
     backup-2 = {
       processWarn = "60"; processAlert = "70";
       loadWarn = "1.0"; loadAlert = "2.0";
+      interface = "ens3";
     };
     monitoring-1 = {
       processWarn = "50"; processAlert = "60";
       loadWarn = "1.0"; loadAlert = "2.0";
+      interface = "ens3";
     };
   };
   masterPassiveObjects = let
index c0a17e66684bd7c75be3ea03f42158fbf8c49233..d3a46ce06a9ac1a6233affcac3b4d995e1a1893d 100644 (file)
@@ -1,5 +1,6 @@
 { hostFQDN
 , hostName
+, interface ? "eth0"
 , processWarn ? "250"
 , processAlert ? "400"
 , loadWarn ? "8.0"
@@ -49,6 +50,20 @@ in
         "RSZDT"
       ];
     }
+    {
+      passiveInfo = defaultPassiveInfo;
+      service_description = "Network bandwidth";
+      use = "local-service";
+      check_interval = "2";
+      max_check_attempts = "20";
+      retry_interval = "2";
+      check_command = [
+        "check_local_bandwidth"
+        interface
+        "20480" # kb/s
+        "51200" # kb/s
+      ];
+    }
     {
       passiveInfo = defaultPassiveInfo;
       service_description = "Average load";
@@ -108,6 +123,7 @@ in
     check_local_procs = "$USER1$/check_procs -w $ARG1$ -c $ARG2$ -s $ARG3$";
     check_local_load = "$USER1$/check_load -w $ARG1$ -c $ARG2$";
     check_local_swap = "$USER1$/check_swap -n ok -w $ARG1$ -c $ARG2$";
+    check_local_bandwidth = "$USER2$/check_bandwidth -i=$ARG1$ -w $ARG2$ -c $ARG3$";
     check_memory = "$USER2$/check_mem.sh -w $ARG1$ -c $ARG2$";
     check_command_match = "$USER2$/check_command -c \"$ARG1$\" -C \"$ARG2$\" $ARG3$";
     check_command_output = "$USER2$/check_command -c \"$ARG1$\" -s 0 -o \"$ARG2$\" $ARG3$";
index 02870ed638a64de698db469e6c149a24ea363069..320f6e33fa7c4be2445ac9d2ac7653b560b38dc3 100644 (file)
@@ -15,7 +15,7 @@ let
     check_command = ["check_eriomem_age" name];
 
     check_interval = "120";
-    notification_interval = "120";
+    notification_interval = "1440";
   };
 in
 {
@@ -106,7 +106,7 @@ in
       check_command = "check_ovh_sms";
 
       check_interval = "120";
-      notification_interval = "120";
+      notification_interval = "1440";
     }
 
     # Backup services
@@ -117,7 +117,7 @@ in
       check_command = "check_eriomem";
 
       check_interval = "120";
-      notification_interval = "120";
+      notification_interval = "1440";
 
       servicegroups = "webstatus-backup";
     }
diff --git a/modules/private/monitoring/plugins/check_bandwidth b/modules/private/monitoring/plugins/check_bandwidth
new file mode 100755 (executable)
index 0000000..53c5d85
--- /dev/null
@@ -0,0 +1,123 @@
+#!/bin/bash
+
+# ============================== SUMMARY =====================================
+#Author : Ken Roulamellah
+#Date : 19/07/2018
+#Version : 1.0
+# Licence : GPL
+# ===================== INFORMATION ABOUT THIS PLUGIN ========================
+#
+# This plugin checks the average RX and TX bandwidth utilisation. It use
+# kbytes as measure unite.
+#
+# ========================== START OF PROGRAM CODE ===========================
+
+STATE_OK=0
+STATE_WARNING=1
+STATE_CRITICAL=2
+STATE_UNKNOWN=3
+
+interface=$( ip route | grep default | awk '{print $5}' | head -n1)
+function print_usage()
+{
+  echo "Usage :"
+  echo "$0 [ -i=INTERFACE]  [ -ct=COUNT ] -w WARNING -c CRITICAL"
+  echo "This script calculate the average bandwith usage."
+  echo "Default values | interface: ${interface}, counter: 10"
+}
+
+counter=10
+warning=-1
+critical=-1
+
+sum_rx=0
+sum_tx=0
+avg_rx=
+avg_tx=
+i=
+
+
+if [[ $# -lt 4 ]];
+then
+       echo "Error: Arguments are missing"
+       print_usage
+       exit $STATE_UNKNOWN
+fi
+
+while [[ $# -gt 0 ]]; do
+    case "$1" in
+        -i=*)
+            interface="$(cut -d'=' -f2 <<<"$1")"
+            shift
+        ;;
+        -ct=*)
+            counter="$(cut -d'=' -f2 <<<"$1")"
+            shift
+        ;;
+        -w)
+            warning=$2
+            shift 2
+        ;;
+        -c)
+            critical=$2
+            shift 2
+        ;;
+        *)
+            printf "\nError: Invalid option '$1'"
+            print_usage
+            exit $STATE_UNKNOWN
+        ;;
+    esac
+done
+
+if [ $warning -lt 0 ] || [ $critical -lt 0 ];
+then
+       echo "Error: You need to specify a warning and critical treshold"
+       print_usage
+    exit $STATE_UNKNOWN
+fi
+
+grep -q "up" /sys/class/net/$interface/operstate || exec echo "$interface: no such device or down"
+
+read rx <"/sys/class/net/$interface/statistics/rx_bytes"
+read tx <"/sys/class/net/$interface/statistics/tx_bytes"
+
+i=$counter
+while [ $i -gt 0 ]; do
+    sleep 1
+    read newrx <"/sys/class/net/$interface/statistics/rx_bytes"
+    read newtx <"/sys/class/net/$interface/statistics/tx_bytes"
+
+    #echo "old rx :$rx"
+    #echo "new rx :$newrx"
+    rx_cal=$(bc <<< "scale=2; ($newrx-$rx) / 1000")
+    tx_cal=$(bc <<< "scale=2; ($newtx-$tx) / 1000")
+
+    sum_rx=$(bc <<< "scale=2;$sum_rx+$rx_cal")
+    sum_tx=$(bc <<< "scale=2;$sum_tx+$tx_cal")
+
+    #echo  "$interface {rx: $rx_cal ko/s, tx: $tx_cal ko/s}"
+    rx=$newrx
+    tx=$newtx
+    ((i --))
+done
+
+avg_rx=$(bc <<< "scale=2;$sum_rx/$counter")
+avg_tx=$(bc <<< "scale=2;$sum_tx/$counter")
+
+#echo "$avg_rx"
+#echo "$avg_tx"
+
+
+if [ $(bc <<< "$avg_rx > $critical || $avg_tx > $critical") -eq 1 ]; then
+       echo "$interface CRITICAL - AVG_RX: $avg_rx kb/s,  AVG_TX:
+        $avg_tx kb/s | RX="$avg_rx"kbps;0;0;0; TX="$avg_tx"kbps;0;0;0;"
+       exit $STATE_CRITICAL
+elif [ $(bc <<< "$avg_rx > $warning || $avg_tx > $warning") -eq 1 ]; then
+       echo "$interface WARNING - AVG_RX: $avg_rx kb/s,  AVG_TX: $avg_tx kb/s | RX="$avg_rx"kbps;0;0;0; TX="$avg_tx"kbps;0;0;0;"
+       exit $STATE_WARNING
+else
+       echo "$interface - OK AVG_RX: $avg_rx kb/s,  AVG_TX: $avg_tx kb/s | RX="$avg_rx"kbps;0;0;0; TX="$avg_tx"kbps;0;0;0;"
+       exit $STATE_OK
+fi
+exit 3