From: Ismaƫl Bouya Date: Thu, 2 Jul 2020 10:56:25 +0000 (+0200) Subject: Adjust monitoring resources and add bandwidth checks X-Git-Url: https://git.immae.eu/?p=perso%2FImmae%2FConfig%2FNix.git;a=commitdiff_plain;h=2d7caffb06095924f324870a30b119246c6f9913 Adjust monitoring resources and add bandwidth checks --- diff --git a/modules/private/monitoring/default.nix b/modules/private/monitoring/default.nix index 316c2dd..8f8b6c0 100644 --- a/modules/private/monitoring/default.nix +++ b/modules/private/monitoring/default.nix @@ -70,20 +70,26 @@ let wrapProgram $out/check_ovh_sms --prefix PATH : ${lib.makeBinPath [ (pkgs.python3.withPackages (ps: [ps.ovh])) ]} + wrapProgram $out/check_bandwidth --prefix PATH : ${lib.makeBinPath [ + pkgs.iproute pkgs.bc + ]} ''; toObjects = pkgs.callPackage ./to_objects.nix {}; commonConfig = { eldiron = { processWarn = "250"; processAlert = "400"; loadWarn = "8.0"; loadAlert = "10.0"; + interface = "eth0"; }; backup-2 = { processWarn = "60"; processAlert = "70"; loadWarn = "1.0"; loadAlert = "2.0"; + interface = "ens3"; }; monitoring-1 = { processWarn = "50"; processAlert = "60"; loadWarn = "1.0"; loadAlert = "2.0"; + interface = "ens3"; }; }; masterPassiveObjects = let diff --git a/modules/private/monitoring/objects_common.nix b/modules/private/monitoring/objects_common.nix index c0a17e6..d3a46ce 100644 --- a/modules/private/monitoring/objects_common.nix +++ b/modules/private/monitoring/objects_common.nix @@ -1,5 +1,6 @@ { hostFQDN , hostName +, interface ? "eth0" , processWarn ? "250" , processAlert ? "400" , loadWarn ? "8.0" @@ -49,6 +50,20 @@ in "RSZDT" ]; } + { + passiveInfo = defaultPassiveInfo; + service_description = "Network bandwidth"; + use = "local-service"; + check_interval = "2"; + max_check_attempts = "20"; + retry_interval = "2"; + check_command = [ + "check_local_bandwidth" + interface + "20480" # kb/s + "51200" # kb/s + ]; + } { passiveInfo = defaultPassiveInfo; service_description = "Average load"; @@ -108,6 +123,7 @@ in check_local_procs = "$USER1$/check_procs -w $ARG1$ -c $ARG2$ -s $ARG3$"; check_local_load = "$USER1$/check_load -w $ARG1$ -c $ARG2$"; check_local_swap = "$USER1$/check_swap -n ok -w $ARG1$ -c $ARG2$"; + check_local_bandwidth = "$USER2$/check_bandwidth -i=$ARG1$ -w $ARG2$ -c $ARG3$"; check_memory = "$USER2$/check_mem.sh -w $ARG1$ -c $ARG2$"; check_command_match = "$USER2$/check_command -c \"$ARG1$\" -C \"$ARG2$\" $ARG3$"; check_command_output = "$USER2$/check_command -c \"$ARG1$\" -s 0 -o \"$ARG2$\" $ARG3$"; diff --git a/modules/private/monitoring/objects_monitoring-1.nix b/modules/private/monitoring/objects_monitoring-1.nix index 02870ed..320f6e3 100644 --- a/modules/private/monitoring/objects_monitoring-1.nix +++ b/modules/private/monitoring/objects_monitoring-1.nix @@ -15,7 +15,7 @@ let check_command = ["check_eriomem_age" name]; check_interval = "120"; - notification_interval = "120"; + notification_interval = "1440"; }; in { @@ -106,7 +106,7 @@ in check_command = "check_ovh_sms"; check_interval = "120"; - notification_interval = "120"; + notification_interval = "1440"; } # Backup services @@ -117,7 +117,7 @@ in check_command = "check_eriomem"; check_interval = "120"; - notification_interval = "120"; + notification_interval = "1440"; servicegroups = "webstatus-backup"; } diff --git a/modules/private/monitoring/plugins/check_bandwidth b/modules/private/monitoring/plugins/check_bandwidth new file mode 100755 index 0000000..53c5d85 --- /dev/null +++ b/modules/private/monitoring/plugins/check_bandwidth @@ -0,0 +1,123 @@ +#!/bin/bash + +# ============================== SUMMARY ===================================== +#Author : Ken Roulamellah +#Date : 19/07/2018 +#Version : 1.0 +# Licence : GPL +# ===================== INFORMATION ABOUT THIS PLUGIN ======================== +# +# This plugin checks the average RX and TX bandwidth utilisation. It use +# kbytes as measure unite. +# +# ========================== START OF PROGRAM CODE =========================== + +STATE_OK=0 +STATE_WARNING=1 +STATE_CRITICAL=2 +STATE_UNKNOWN=3 + +interface=$( ip route | grep default | awk '{print $5}' | head -n1) +function print_usage() +{ + echo "Usage :" + echo "$0 [ -i=INTERFACE] [ -ct=COUNT ] -w WARNING -c CRITICAL" + echo "This script calculate the average bandwith usage." + echo "Default values | interface: ${interface}, counter: 10" +} + +counter=10 +warning=-1 +critical=-1 + +sum_rx=0 +sum_tx=0 +avg_rx= +avg_tx= +i= + + +if [[ $# -lt 4 ]]; +then + echo "Error: Arguments are missing" + print_usage + exit $STATE_UNKNOWN +fi + +while [[ $# -gt 0 ]]; do + case "$1" in + -i=*) + interface="$(cut -d'=' -f2 <<<"$1")" + shift + ;; + -ct=*) + counter="$(cut -d'=' -f2 <<<"$1")" + shift + ;; + -w) + warning=$2 + shift 2 + ;; + -c) + critical=$2 + shift 2 + ;; + *) + printf "\nError: Invalid option '$1'" + print_usage + exit $STATE_UNKNOWN + ;; + esac +done + +if [ $warning -lt 0 ] || [ $critical -lt 0 ]; +then + echo "Error: You need to specify a warning and critical treshold" + print_usage + exit $STATE_UNKNOWN +fi + +grep -q "up" /sys/class/net/$interface/operstate || exec echo "$interface: no such device or down" + +read rx <"/sys/class/net/$interface/statistics/rx_bytes" +read tx <"/sys/class/net/$interface/statistics/tx_bytes" + +i=$counter +while [ $i -gt 0 ]; do + sleep 1 + read newrx <"/sys/class/net/$interface/statistics/rx_bytes" + read newtx <"/sys/class/net/$interface/statistics/tx_bytes" + + #echo "old rx :$rx" + #echo "new rx :$newrx" + rx_cal=$(bc <<< "scale=2; ($newrx-$rx) / 1000") + tx_cal=$(bc <<< "scale=2; ($newtx-$tx) / 1000") + + sum_rx=$(bc <<< "scale=2;$sum_rx+$rx_cal") + sum_tx=$(bc <<< "scale=2;$sum_tx+$tx_cal") + + #echo "$interface {rx: $rx_cal ko/s, tx: $tx_cal ko/s}" + rx=$newrx + tx=$newtx + ((i --)) +done + +avg_rx=$(bc <<< "scale=2;$sum_rx/$counter") +avg_tx=$(bc <<< "scale=2;$sum_tx/$counter") + +#echo "$avg_rx" +#echo "$avg_tx" + + +if [ $(bc <<< "$avg_rx > $critical || $avg_tx > $critical") -eq 1 ]; then + echo "$interface CRITICAL - AVG_RX: $avg_rx kb/s, AVG_TX: + $avg_tx kb/s | RX="$avg_rx"kbps;0;0;0; TX="$avg_tx"kbps;0;0;0;" + exit $STATE_CRITICAL +elif [ $(bc <<< "$avg_rx > $warning || $avg_tx > $warning") -eq 1 ]; then + echo "$interface WARNING - AVG_RX: $avg_rx kb/s, AVG_TX: $avg_tx kb/s | RX="$avg_rx"kbps;0;0;0; TX="$avg_tx"kbps;0;0;0;" + exit $STATE_WARNING +else + echo "$interface - OK AVG_RX: $avg_rx kb/s, AVG_TX: $avg_tx kb/s | RX="$avg_rx"kbps;0;0;0; TX="$avg_tx"kbps;0;0;0;" + exit $STATE_OK +fi +exit 3