diff options
author | Ismaël Bouya <ismael.bouya@normalesup.org> | 2020-07-02 12:56:25 +0200 |
---|---|---|
committer | Ismaël Bouya <ismael.bouya@normalesup.org> | 2020-07-02 12:56:25 +0200 |
commit | 2d7caffb06095924f324870a30b119246c6f9913 (patch) | |
tree | 20d8f910918545489a916f9bbfe6ebfd7b5e5307 /modules | |
parent | 34a1646140866e0dfaa90b6f9e26ff6562a24136 (diff) | |
download | Nix-2d7caffb06095924f324870a30b119246c6f9913.tar.gz Nix-2d7caffb06095924f324870a30b119246c6f9913.tar.zst Nix-2d7caffb06095924f324870a30b119246c6f9913.zip |
Adjust monitoring resources and add bandwidth checks
Diffstat (limited to 'modules')
-rw-r--r-- | modules/private/monitoring/default.nix | 6 | ||||
-rw-r--r-- | modules/private/monitoring/objects_common.nix | 16 | ||||
-rw-r--r-- | modules/private/monitoring/objects_monitoring-1.nix | 6 | ||||
-rwxr-xr-x | modules/private/monitoring/plugins/check_bandwidth | 123 |
4 files changed, 148 insertions, 3 deletions
diff --git a/modules/private/monitoring/default.nix b/modules/private/monitoring/default.nix index 316c2dd..8f8b6c0 100644 --- a/modules/private/monitoring/default.nix +++ b/modules/private/monitoring/default.nix | |||
@@ -70,20 +70,26 @@ let | |||
70 | wrapProgram $out/check_ovh_sms --prefix PATH : ${lib.makeBinPath [ | 70 | wrapProgram $out/check_ovh_sms --prefix PATH : ${lib.makeBinPath [ |
71 | (pkgs.python3.withPackages (ps: [ps.ovh])) | 71 | (pkgs.python3.withPackages (ps: [ps.ovh])) |
72 | ]} | 72 | ]} |
73 | wrapProgram $out/check_bandwidth --prefix PATH : ${lib.makeBinPath [ | ||
74 | pkgs.iproute pkgs.bc | ||
75 | ]} | ||
73 | ''; | 76 | ''; |
74 | toObjects = pkgs.callPackage ./to_objects.nix {}; | 77 | toObjects = pkgs.callPackage ./to_objects.nix {}; |
75 | commonConfig = { | 78 | commonConfig = { |
76 | eldiron = { | 79 | eldiron = { |
77 | processWarn = "250"; processAlert = "400"; | 80 | processWarn = "250"; processAlert = "400"; |
78 | loadWarn = "8.0"; loadAlert = "10.0"; | 81 | loadWarn = "8.0"; loadAlert = "10.0"; |
82 | interface = "eth0"; | ||
79 | }; | 83 | }; |
80 | backup-2 = { | 84 | backup-2 = { |
81 | processWarn = "60"; processAlert = "70"; | 85 | processWarn = "60"; processAlert = "70"; |
82 | loadWarn = "1.0"; loadAlert = "2.0"; | 86 | loadWarn = "1.0"; loadAlert = "2.0"; |
87 | interface = "ens3"; | ||
83 | }; | 88 | }; |
84 | monitoring-1 = { | 89 | monitoring-1 = { |
85 | processWarn = "50"; processAlert = "60"; | 90 | processWarn = "50"; processAlert = "60"; |
86 | loadWarn = "1.0"; loadAlert = "2.0"; | 91 | loadWarn = "1.0"; loadAlert = "2.0"; |
92 | interface = "ens3"; | ||
87 | }; | 93 | }; |
88 | }; | 94 | }; |
89 | masterPassiveObjects = let | 95 | masterPassiveObjects = let |
diff --git a/modules/private/monitoring/objects_common.nix b/modules/private/monitoring/objects_common.nix index c0a17e6..d3a46ce 100644 --- a/modules/private/monitoring/objects_common.nix +++ b/modules/private/monitoring/objects_common.nix | |||
@@ -1,5 +1,6 @@ | |||
1 | { hostFQDN | 1 | { hostFQDN |
2 | , hostName | 2 | , hostName |
3 | , interface ? "eth0" | ||
3 | , processWarn ? "250" | 4 | , processWarn ? "250" |
4 | , processAlert ? "400" | 5 | , processAlert ? "400" |
5 | , loadWarn ? "8.0" | 6 | , loadWarn ? "8.0" |
@@ -51,6 +52,20 @@ in | |||
51 | } | 52 | } |
52 | { | 53 | { |
53 | passiveInfo = defaultPassiveInfo; | 54 | passiveInfo = defaultPassiveInfo; |
55 | service_description = "Network bandwidth"; | ||
56 | use = "local-service"; | ||
57 | check_interval = "2"; | ||
58 | max_check_attempts = "20"; | ||
59 | retry_interval = "2"; | ||
60 | check_command = [ | ||
61 | "check_local_bandwidth" | ||
62 | interface | ||
63 | "20480" # kb/s | ||
64 | "51200" # kb/s | ||
65 | ]; | ||
66 | } | ||
67 | { | ||
68 | passiveInfo = defaultPassiveInfo; | ||
54 | service_description = "Average load"; | 69 | service_description = "Average load"; |
55 | use = "local-service"; | 70 | use = "local-service"; |
56 | check_command = [ | 71 | check_command = [ |
@@ -108,6 +123,7 @@ in | |||
108 | check_local_procs = "$USER1$/check_procs -w $ARG1$ -c $ARG2$ -s $ARG3$"; | 123 | check_local_procs = "$USER1$/check_procs -w $ARG1$ -c $ARG2$ -s $ARG3$"; |
109 | check_local_load = "$USER1$/check_load -w $ARG1$ -c $ARG2$"; | 124 | check_local_load = "$USER1$/check_load -w $ARG1$ -c $ARG2$"; |
110 | check_local_swap = "$USER1$/check_swap -n ok -w $ARG1$ -c $ARG2$"; | 125 | check_local_swap = "$USER1$/check_swap -n ok -w $ARG1$ -c $ARG2$"; |
126 | check_local_bandwidth = "$USER2$/check_bandwidth -i=$ARG1$ -w $ARG2$ -c $ARG3$"; | ||
111 | check_memory = "$USER2$/check_mem.sh -w $ARG1$ -c $ARG2$"; | 127 | check_memory = "$USER2$/check_mem.sh -w $ARG1$ -c $ARG2$"; |
112 | check_command_match = "$USER2$/check_command -c \"$ARG1$\" -C \"$ARG2$\" $ARG3$"; | 128 | check_command_match = "$USER2$/check_command -c \"$ARG1$\" -C \"$ARG2$\" $ARG3$"; |
113 | check_command_output = "$USER2$/check_command -c \"$ARG1$\" -s 0 -o \"$ARG2$\" $ARG3$"; | 129 | check_command_output = "$USER2$/check_command -c \"$ARG1$\" -s 0 -o \"$ARG2$\" $ARG3$"; |
diff --git a/modules/private/monitoring/objects_monitoring-1.nix b/modules/private/monitoring/objects_monitoring-1.nix index 02870ed..320f6e3 100644 --- a/modules/private/monitoring/objects_monitoring-1.nix +++ b/modules/private/monitoring/objects_monitoring-1.nix | |||
@@ -15,7 +15,7 @@ let | |||
15 | check_command = ["check_eriomem_age" name]; | 15 | check_command = ["check_eriomem_age" name]; |
16 | 16 | ||
17 | check_interval = "120"; | 17 | check_interval = "120"; |
18 | notification_interval = "120"; | 18 | notification_interval = "1440"; |
19 | }; | 19 | }; |
20 | in | 20 | in |
21 | { | 21 | { |
@@ -106,7 +106,7 @@ in | |||
106 | check_command = "check_ovh_sms"; | 106 | check_command = "check_ovh_sms"; |
107 | 107 | ||
108 | check_interval = "120"; | 108 | check_interval = "120"; |
109 | notification_interval = "120"; | 109 | notification_interval = "1440"; |
110 | } | 110 | } |
111 | 111 | ||
112 | # Backup services | 112 | # Backup services |
@@ -117,7 +117,7 @@ in | |||
117 | check_command = "check_eriomem"; | 117 | check_command = "check_eriomem"; |
118 | 118 | ||
119 | check_interval = "120"; | 119 | check_interval = "120"; |
120 | notification_interval = "120"; | 120 | notification_interval = "1440"; |
121 | 121 | ||
122 | servicegroups = "webstatus-backup"; | 122 | servicegroups = "webstatus-backup"; |
123 | } | 123 | } |
diff --git a/modules/private/monitoring/plugins/check_bandwidth b/modules/private/monitoring/plugins/check_bandwidth new file mode 100755 index 0000000..53c5d85 --- /dev/null +++ b/modules/private/monitoring/plugins/check_bandwidth | |||
@@ -0,0 +1,123 @@ | |||
1 | #!/bin/bash | ||
2 | |||
3 | # ============================== SUMMARY ===================================== | ||
4 | #Author : Ken Roulamellah | ||
5 | #Date : 19/07/2018 | ||
6 | #Version : 1.0 | ||
7 | # Licence : GPL | ||
8 | # ===================== INFORMATION ABOUT THIS PLUGIN ======================== | ||
9 | # | ||
10 | # This plugin checks the average RX and TX bandwidth utilisation. It use | ||
11 | # kbytes as measure unite. | ||
12 | # | ||
13 | # ========================== START OF PROGRAM CODE =========================== | ||
14 | |||
15 | STATE_OK=0 | ||
16 | STATE_WARNING=1 | ||
17 | STATE_CRITICAL=2 | ||
18 | STATE_UNKNOWN=3 | ||
19 | |||
20 | interface=$( ip route | grep default | awk '{print $5}' | head -n1) | ||
21 | function print_usage() | ||
22 | { | ||
23 | echo "Usage :" | ||
24 | echo "$0 [ -i=INTERFACE] [ -ct=COUNT ] -w WARNING -c CRITICAL" | ||
25 | echo "This script calculate the average bandwith usage." | ||
26 | echo "Default values | interface: ${interface}, counter: 10" | ||
27 | } | ||
28 | |||
29 | counter=10 | ||
30 | warning=-1 | ||
31 | critical=-1 | ||
32 | |||
33 | sum_rx=0 | ||
34 | sum_tx=0 | ||
35 | avg_rx= | ||
36 | avg_tx= | ||
37 | i= | ||
38 | |||
39 | |||
40 | if [[ $# -lt 4 ]]; | ||
41 | then | ||
42 | echo "Error: Arguments are missing" | ||
43 | print_usage | ||
44 | exit $STATE_UNKNOWN | ||
45 | fi | ||
46 | |||
47 | while [[ $# -gt 0 ]]; do | ||
48 | case "$1" in | ||
49 | -i=*) | ||
50 | interface="$(cut -d'=' -f2 <<<"$1")" | ||
51 | shift | ||
52 | ;; | ||
53 | -ct=*) | ||
54 | counter="$(cut -d'=' -f2 <<<"$1")" | ||
55 | shift | ||
56 | ;; | ||
57 | -w) | ||
58 | warning=$2 | ||
59 | shift 2 | ||
60 | ;; | ||
61 | -c) | ||
62 | critical=$2 | ||
63 | shift 2 | ||
64 | ;; | ||
65 | *) | ||
66 | printf "\nError: Invalid option '$1'" | ||
67 | print_usage | ||
68 | exit $STATE_UNKNOWN | ||
69 | ;; | ||
70 | esac | ||
71 | done | ||
72 | |||
73 | if [ $warning -lt 0 ] || [ $critical -lt 0 ]; | ||
74 | then | ||
75 | echo "Error: You need to specify a warning and critical treshold" | ||
76 | print_usage | ||
77 | exit $STATE_UNKNOWN | ||
78 | fi | ||
79 | |||
80 | grep -q "up" /sys/class/net/$interface/operstate || exec echo "$interface: no such device or down" | ||
81 | |||
82 | read rx <"/sys/class/net/$interface/statistics/rx_bytes" | ||
83 | read tx <"/sys/class/net/$interface/statistics/tx_bytes" | ||
84 | |||
85 | i=$counter | ||
86 | while [ $i -gt 0 ]; do | ||
87 | sleep 1 | ||
88 | read newrx <"/sys/class/net/$interface/statistics/rx_bytes" | ||
89 | read newtx <"/sys/class/net/$interface/statistics/tx_bytes" | ||
90 | |||
91 | #echo "old rx :$rx" | ||
92 | #echo "new rx :$newrx" | ||
93 | rx_cal=$(bc <<< "scale=2; ($newrx-$rx) / 1000") | ||
94 | tx_cal=$(bc <<< "scale=2; ($newtx-$tx) / 1000") | ||
95 | |||
96 | sum_rx=$(bc <<< "scale=2;$sum_rx+$rx_cal") | ||
97 | sum_tx=$(bc <<< "scale=2;$sum_tx+$tx_cal") | ||
98 | |||
99 | #echo "$interface {rx: $rx_cal ko/s, tx: $tx_cal ko/s}" | ||
100 | rx=$newrx | ||
101 | tx=$newtx | ||
102 | ((i --)) | ||
103 | done | ||
104 | |||
105 | avg_rx=$(bc <<< "scale=2;$sum_rx/$counter") | ||
106 | avg_tx=$(bc <<< "scale=2;$sum_tx/$counter") | ||
107 | |||
108 | #echo "$avg_rx" | ||
109 | #echo "$avg_tx" | ||
110 | |||
111 | |||
112 | if [ $(bc <<< "$avg_rx > $critical || $avg_tx > $critical") -eq 1 ]; then | ||
113 | echo "$interface CRITICAL - AVG_RX: $avg_rx kb/s, AVG_TX: | ||
114 | $avg_tx kb/s | RX="$avg_rx"kbps;0;0;0; TX="$avg_tx"kbps;0;0;0;" | ||
115 | exit $STATE_CRITICAL | ||
116 | elif [ $(bc <<< "$avg_rx > $warning || $avg_tx > $warning") -eq 1 ]; then | ||
117 | echo "$interface WARNING - AVG_RX: $avg_rx kb/s, AVG_TX: $avg_tx kb/s | RX="$avg_rx"kbps;0;0;0; TX="$avg_tx"kbps;0;0;0;" | ||
118 | exit $STATE_WARNING | ||
119 | else | ||
120 | echo "$interface - OK AVG_RX: $avg_rx kb/s, AVG_TX: $avg_tx kb/s | RX="$avg_rx"kbps;0;0;0; TX="$avg_tx"kbps;0;0;0;" | ||
121 | exit $STATE_OK | ||
122 | fi | ||
123 | exit 3 | ||