aboutsummaryrefslogtreecommitdiff
path: root/modules
diff options
context:
space:
mode:
authorIsmaël Bouya <ismael.bouya@normalesup.org>2020-07-02 12:56:25 +0200
committerIsmaël Bouya <ismael.bouya@normalesup.org>2020-07-02 12:56:25 +0200
commit2d7caffb06095924f324870a30b119246c6f9913 (patch)
tree20d8f910918545489a916f9bbfe6ebfd7b5e5307 /modules
parent34a1646140866e0dfaa90b6f9e26ff6562a24136 (diff)
downloadNix-2d7caffb06095924f324870a30b119246c6f9913.tar.gz
Nix-2d7caffb06095924f324870a30b119246c6f9913.tar.zst
Nix-2d7caffb06095924f324870a30b119246c6f9913.zip
Adjust monitoring resources and add bandwidth checks
Diffstat (limited to 'modules')
-rw-r--r--modules/private/monitoring/default.nix6
-rw-r--r--modules/private/monitoring/objects_common.nix16
-rw-r--r--modules/private/monitoring/objects_monitoring-1.nix6
-rwxr-xr-xmodules/private/monitoring/plugins/check_bandwidth123
4 files changed, 148 insertions, 3 deletions
diff --git a/modules/private/monitoring/default.nix b/modules/private/monitoring/default.nix
index 316c2dd..8f8b6c0 100644
--- a/modules/private/monitoring/default.nix
+++ b/modules/private/monitoring/default.nix
@@ -70,20 +70,26 @@ let
70 wrapProgram $out/check_ovh_sms --prefix PATH : ${lib.makeBinPath [ 70 wrapProgram $out/check_ovh_sms --prefix PATH : ${lib.makeBinPath [
71 (pkgs.python3.withPackages (ps: [ps.ovh])) 71 (pkgs.python3.withPackages (ps: [ps.ovh]))
72 ]} 72 ]}
73 wrapProgram $out/check_bandwidth --prefix PATH : ${lib.makeBinPath [
74 pkgs.iproute pkgs.bc
75 ]}
73 ''; 76 '';
74 toObjects = pkgs.callPackage ./to_objects.nix {}; 77 toObjects = pkgs.callPackage ./to_objects.nix {};
75 commonConfig = { 78 commonConfig = {
76 eldiron = { 79 eldiron = {
77 processWarn = "250"; processAlert = "400"; 80 processWarn = "250"; processAlert = "400";
78 loadWarn = "8.0"; loadAlert = "10.0"; 81 loadWarn = "8.0"; loadAlert = "10.0";
82 interface = "eth0";
79 }; 83 };
80 backup-2 = { 84 backup-2 = {
81 processWarn = "60"; processAlert = "70"; 85 processWarn = "60"; processAlert = "70";
82 loadWarn = "1.0"; loadAlert = "2.0"; 86 loadWarn = "1.0"; loadAlert = "2.0";
87 interface = "ens3";
83 }; 88 };
84 monitoring-1 = { 89 monitoring-1 = {
85 processWarn = "50"; processAlert = "60"; 90 processWarn = "50"; processAlert = "60";
86 loadWarn = "1.0"; loadAlert = "2.0"; 91 loadWarn = "1.0"; loadAlert = "2.0";
92 interface = "ens3";
87 }; 93 };
88 }; 94 };
89 masterPassiveObjects = let 95 masterPassiveObjects = let
diff --git a/modules/private/monitoring/objects_common.nix b/modules/private/monitoring/objects_common.nix
index c0a17e6..d3a46ce 100644
--- a/modules/private/monitoring/objects_common.nix
+++ b/modules/private/monitoring/objects_common.nix
@@ -1,5 +1,6 @@
1{ hostFQDN 1{ hostFQDN
2, hostName 2, hostName
3, interface ? "eth0"
3, processWarn ? "250" 4, processWarn ? "250"
4, processAlert ? "400" 5, processAlert ? "400"
5, loadWarn ? "8.0" 6, loadWarn ? "8.0"
@@ -51,6 +52,20 @@ in
51 } 52 }
52 { 53 {
53 passiveInfo = defaultPassiveInfo; 54 passiveInfo = defaultPassiveInfo;
55 service_description = "Network bandwidth";
56 use = "local-service";
57 check_interval = "2";
58 max_check_attempts = "20";
59 retry_interval = "2";
60 check_command = [
61 "check_local_bandwidth"
62 interface
63 "20480" # kb/s
64 "51200" # kb/s
65 ];
66 }
67 {
68 passiveInfo = defaultPassiveInfo;
54 service_description = "Average load"; 69 service_description = "Average load";
55 use = "local-service"; 70 use = "local-service";
56 check_command = [ 71 check_command = [
@@ -108,6 +123,7 @@ in
108 check_local_procs = "$USER1$/check_procs -w $ARG1$ -c $ARG2$ -s $ARG3$"; 123 check_local_procs = "$USER1$/check_procs -w $ARG1$ -c $ARG2$ -s $ARG3$";
109 check_local_load = "$USER1$/check_load -w $ARG1$ -c $ARG2$"; 124 check_local_load = "$USER1$/check_load -w $ARG1$ -c $ARG2$";
110 check_local_swap = "$USER1$/check_swap -n ok -w $ARG1$ -c $ARG2$"; 125 check_local_swap = "$USER1$/check_swap -n ok -w $ARG1$ -c $ARG2$";
126 check_local_bandwidth = "$USER2$/check_bandwidth -i=$ARG1$ -w $ARG2$ -c $ARG3$";
111 check_memory = "$USER2$/check_mem.sh -w $ARG1$ -c $ARG2$"; 127 check_memory = "$USER2$/check_mem.sh -w $ARG1$ -c $ARG2$";
112 check_command_match = "$USER2$/check_command -c \"$ARG1$\" -C \"$ARG2$\" $ARG3$"; 128 check_command_match = "$USER2$/check_command -c \"$ARG1$\" -C \"$ARG2$\" $ARG3$";
113 check_command_output = "$USER2$/check_command -c \"$ARG1$\" -s 0 -o \"$ARG2$\" $ARG3$"; 129 check_command_output = "$USER2$/check_command -c \"$ARG1$\" -s 0 -o \"$ARG2$\" $ARG3$";
diff --git a/modules/private/monitoring/objects_monitoring-1.nix b/modules/private/monitoring/objects_monitoring-1.nix
index 02870ed..320f6e3 100644
--- a/modules/private/monitoring/objects_monitoring-1.nix
+++ b/modules/private/monitoring/objects_monitoring-1.nix
@@ -15,7 +15,7 @@ let
15 check_command = ["check_eriomem_age" name]; 15 check_command = ["check_eriomem_age" name];
16 16
17 check_interval = "120"; 17 check_interval = "120";
18 notification_interval = "120"; 18 notification_interval = "1440";
19 }; 19 };
20in 20in
21{ 21{
@@ -106,7 +106,7 @@ in
106 check_command = "check_ovh_sms"; 106 check_command = "check_ovh_sms";
107 107
108 check_interval = "120"; 108 check_interval = "120";
109 notification_interval = "120"; 109 notification_interval = "1440";
110 } 110 }
111 111
112 # Backup services 112 # Backup services
@@ -117,7 +117,7 @@ in
117 check_command = "check_eriomem"; 117 check_command = "check_eriomem";
118 118
119 check_interval = "120"; 119 check_interval = "120";
120 notification_interval = "120"; 120 notification_interval = "1440";
121 121
122 servicegroups = "webstatus-backup"; 122 servicegroups = "webstatus-backup";
123 } 123 }
diff --git a/modules/private/monitoring/plugins/check_bandwidth b/modules/private/monitoring/plugins/check_bandwidth
new file mode 100755
index 0000000..53c5d85
--- /dev/null
+++ b/modules/private/monitoring/plugins/check_bandwidth
@@ -0,0 +1,123 @@
1#!/bin/bash
2
3# ============================== SUMMARY =====================================
4#Author : Ken Roulamellah
5#Date : 19/07/2018
6#Version : 1.0
7# Licence : GPL
8# ===================== INFORMATION ABOUT THIS PLUGIN ========================
9#
10# This plugin checks the average RX and TX bandwidth utilisation. It use
11# kbytes as measure unite.
12#
13# ========================== START OF PROGRAM CODE ===========================
14
15STATE_OK=0
16STATE_WARNING=1
17STATE_CRITICAL=2
18STATE_UNKNOWN=3
19
20interface=$( ip route | grep default | awk '{print $5}' | head -n1)
21function print_usage()
22{
23 echo "Usage :"
24 echo "$0 [ -i=INTERFACE] [ -ct=COUNT ] -w WARNING -c CRITICAL"
25 echo "This script calculate the average bandwith usage."
26 echo "Default values | interface: ${interface}, counter: 10"
27}
28
29counter=10
30warning=-1
31critical=-1
32
33sum_rx=0
34sum_tx=0
35avg_rx=
36avg_tx=
37i=
38
39
40if [[ $# -lt 4 ]];
41then
42 echo "Error: Arguments are missing"
43 print_usage
44 exit $STATE_UNKNOWN
45fi
46
47while [[ $# -gt 0 ]]; do
48 case "$1" in
49 -i=*)
50 interface="$(cut -d'=' -f2 <<<"$1")"
51 shift
52 ;;
53 -ct=*)
54 counter="$(cut -d'=' -f2 <<<"$1")"
55 shift
56 ;;
57 -w)
58 warning=$2
59 shift 2
60 ;;
61 -c)
62 critical=$2
63 shift 2
64 ;;
65 *)
66 printf "\nError: Invalid option '$1'"
67 print_usage
68 exit $STATE_UNKNOWN
69 ;;
70 esac
71done
72
73if [ $warning -lt 0 ] || [ $critical -lt 0 ];
74then
75 echo "Error: You need to specify a warning and critical treshold"
76 print_usage
77 exit $STATE_UNKNOWN
78fi
79
80grep -q "up" /sys/class/net/$interface/operstate || exec echo "$interface: no such device or down"
81
82read rx <"/sys/class/net/$interface/statistics/rx_bytes"
83read tx <"/sys/class/net/$interface/statistics/tx_bytes"
84
85i=$counter
86while [ $i -gt 0 ]; do
87 sleep 1
88 read newrx <"/sys/class/net/$interface/statistics/rx_bytes"
89 read newtx <"/sys/class/net/$interface/statistics/tx_bytes"
90
91 #echo "old rx :$rx"
92 #echo "new rx :$newrx"
93 rx_cal=$(bc <<< "scale=2; ($newrx-$rx) / 1000")
94 tx_cal=$(bc <<< "scale=2; ($newtx-$tx) / 1000")
95
96 sum_rx=$(bc <<< "scale=2;$sum_rx+$rx_cal")
97 sum_tx=$(bc <<< "scale=2;$sum_tx+$tx_cal")
98
99 #echo "$interface {rx: $rx_cal ko/s, tx: $tx_cal ko/s}"
100 rx=$newrx
101 tx=$newtx
102 ((i --))
103done
104
105avg_rx=$(bc <<< "scale=2;$sum_rx/$counter")
106avg_tx=$(bc <<< "scale=2;$sum_tx/$counter")
107
108#echo "$avg_rx"
109#echo "$avg_tx"
110
111
112if [ $(bc <<< "$avg_rx > $critical || $avg_tx > $critical") -eq 1 ]; then
113 echo "$interface CRITICAL - AVG_RX: $avg_rx kb/s, AVG_TX:
114 $avg_tx kb/s | RX="$avg_rx"kbps;0;0;0; TX="$avg_tx"kbps;0;0;0;"
115 exit $STATE_CRITICAL
116elif [ $(bc <<< "$avg_rx > $warning || $avg_tx > $warning") -eq 1 ]; then
117 echo "$interface WARNING - AVG_RX: $avg_rx kb/s, AVG_TX: $avg_tx kb/s | RX="$avg_rx"kbps;0;0;0; TX="$avg_tx"kbps;0;0;0;"
118 exit $STATE_WARNING
119else
120 echo "$interface - OK AVG_RX: $avg_rx kb/s, AVG_TX: $avg_tx kb/s | RX="$avg_rx"kbps;0;0;0; TX="$avg_tx"kbps;0;0;0;"
121 exit $STATE_OK
122fi
123exit 3