From f46b2c61a7a6c7c494f801002ddcf73fcc53fee4 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Isma=C3=ABl=20Bouya?= Date: Sun, 2 May 2021 15:35:50 +0200 Subject: [PATCH] Add snapshot date check for monitoring --- modules/private/monitoring/myplugins.nix | 6 + modules/private/monitoring/objects_dilion.nix | 9 + .../monitoring/plugins/check_zfs_snapshot | 325 ++++++++++++++++++ 3 files changed, 340 insertions(+) create mode 100755 modules/private/monitoring/plugins/check_zfs_snapshot diff --git a/modules/private/monitoring/myplugins.nix b/modules/private/monitoring/myplugins.nix index 86b5f1e..e59ddc4 100644 --- a/modules/private/monitoring/myplugins.nix +++ b/modules/private/monitoring/myplugins.nix @@ -365,6 +365,7 @@ in zfs = { commands = { check_zfs = "$USER2$/check_zpool.sh -p ALL -w 80 -c 90"; + check_zfs_snapshot = "$USER2$/check_zfs_snapshot -d $ARG1$ -c 18000 -w 14400"; }; chunk = let zfsPlugin = pkgs.fetchurl { @@ -378,6 +379,11 @@ in wrapProgram $out/check_zpool.sh --prefix PATH : ${lib.makeBinPath [ pkgs.which pkgs.zfs pkgs.gawk ]} + cp ${./plugins}/check_zfs_snapshot $out + patchShebangs $out/check_zfs_snapshot + wrapProgram $out/check_zfs_snapshot --prefix PATH : ${lib.makeBinPath [ + pkgs.zfs pkgs.coreutils pkgs.gawk pkgs.gnugrep + ]} ''; }; } diff --git a/modules/private/monitoring/objects_dilion.nix b/modules/private/monitoring/objects_dilion.nix index 1baaf39..16b3c64 100644 --- a/modules/private/monitoring/objects_dilion.nix +++ b/modules/private/monitoring/objects_dilion.nix @@ -9,6 +9,12 @@ let servicegroups = "webstatus-resources"; host_name = hostFQDN; }; + zfs_snapshot = name: { + passiveInfo = defaultPassiveInfo // { servicegroups = "webstatus-resources"; }; + service_description = "ZFS snapshot ${name} happened not too long ago"; + use = "local-service"; + check_command = ["check_zfs_snapshot" name]; + }; in { activatedPlugins = [ "zfs" ]; @@ -19,5 +25,8 @@ in use = "local-service"; check_command = ["check_zfs"]; } + (zfs_snapshot "zpool/backup/eldiron/zpool/root") + (zfs_snapshot "zpool/backup/eldiron/zpool/root/etc") + (zfs_snapshot "zpool/backup/eldiron/zpool/root/var") ]; } diff --git a/modules/private/monitoring/plugins/check_zfs_snapshot b/modules/private/monitoring/plugins/check_zfs_snapshot new file mode 100755 index 0000000..56f8c4f --- /dev/null +++ b/modules/private/monitoring/plugins/check_zfs_snapshot @@ -0,0 +1,325 @@ +#! /bin/sh + +OS=$(uname) + +# MIT License +# +# Copyright (c) 2016 Josef Friedrich +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +######################################################################## +# Date functions +######################################################################## + +# This date function must be placed on the top of this file because +# they are used in some global variables. + +# to_year ### + +## +# Get the four digit year integer from now. +# +# Return: +# The current 4 digit year. +## +_now_to_year() { + date +%Y +} + +## +# Convert a date in the format YYYY-MM-DD to a four digit year integer. +# +# Parameters: +# a date in the format YYYY-MM-DD +# +# Return: +# four digit year integer +## +_date_to_year() { + local OPTIONS + if [ "$OS" = 'Linux' ]; then + OPTIONS="--date $1" + # FreeBSD, Darwin + else + OPTIONS="-j -f %Y-%m-%d $1" + fi + date $OPTIONS +%Y +} + +# to_datetime ### + +## +# Convert a UNIX timestamp to a datetime string. +# +# Parameters: +# UNIX timestamp +# +# Return: +# %Y-%m-%d.%H:%M:%S +## +_timestamp_to_datetime() { + local OPTIONS + if [ "$OS" = 'Linux' ]; then + OPTIONS="--date @$1" + # FreeBSD, Darwin + else + OPTIONS="-j -f %s $1" + fi + date $OPTIONS +%Y-%m-%d.%H:%M:%S +} + +# to_timestamp ### + +## +# Get the current UNIX timestamp. +# +# Return: +# %current UNIX timestamp +## +_now_to_timestamp() { + date +%s +} + +PROJECT_PAGES='https://github.com/Josef-Friedrich/check_zfs_snapshot +https://exchange.icinga.com/joseffriedrich/check_zfs_snapshot +https://exchange.nagios.org/directory/Plugins/System-Metrics/File-System/check_zfs_snapshot/details' + +VERSION=1.2 +FIRST_RELEASE=2016-09-08 +SHORT_DESCRIPTION="Monitoring plugin to check how long ago the last \ +snapshot of a ZFS dataset was created." +USAGE="check_zfs_snapshot v$VERSION +Copyright (c) $(_date_to_year $FIRST_RELEASE)-$(_now_to_year) \ +Josef Friedrich + +$SHORT_DESCRIPTION + + +Usage: check_zfs_snapshot + +Options: + -c, --critical=OPT_CRITICAL + Interval in seconds for critical state. + -d, --dataset=OPT_DATASET + The ZFS dataset to check. + -h, --help + Show this help. + -s, --short-description + Show a short description of the command. + -v, --version + Show the version number. + -w, --warning=OPT_WARNING + Interval in seconds for warning state. Must be lower than -c + +Performance data: + - last_ago + Time interval in seconds for last snapshot. + - warning + Interval in seconds. + - critical + Interval in seconds. + - snapshot_count + How many snapshot exists in the given dataset and all child + datasets exists. +" + +# Exit codes +STATE_OK=0 +STATE_WARNING=1 +STATE_CRITICAL=2 +STATE_UNKNOWN=3 + +_get_last_snapshot() { + zfs get creation -Hpr -t snapshot "$1" | \ + awk 'BEGIN {max = 0} {if ($3>max) max=$3} END {print max}' +} + +_getopts() { + while getopts ':c:d:hsvw:-:' OPT ; do + case $OPT in + + c) + OPT_CRITICAL=$OPTARG + ;; + + d) + OPT_DATASET="$OPTARG" + ;; + + h) + echo "$USAGE" + exit 0 + ;; + + s) + echo "$SHORT_DESCRIPTION" + exit 0 + ;; + + v) + echo "$VERSION" + exit 0 + ;; + + w) + OPT_WARNING=$OPTARG + ;; + + \?) + echo "Invalid option “-$OPTARG”!" >&2 + exit 2 + ;; + + :) + echo "Option “-$OPTARG” requires an argument!" >&2 + exit 3 + ;; + + -) + LONG_OPTARG="${OPTARG#*=}" + + case $OPTARG in + + critical=?*) + OPT_CRITICAL=$LONG_OPTARG + ;; + + dataset=?*) + OPT_DATASET="$LONG_OPTARG" + ;; + + help) + echo "$USAGE" + exit 0 + ;; + + short-description) + echo "$SHORT_DESCRIPTION" + exit 0 + ;; + + version) + echo "$VERSION" + exit 0 + ;; + + warning=?*) + OPT_WARNING=$LONG_OPTARG + ;; + + critical*|dataset*|warning*) + echo "Option “--$OPTARG” requires an argument!" >&2 + exit 3 + ;; + + help*|short-description*|version*) + echo "No argument allowed for the option “--$OPTARG”!" >&2 + exit 4 + ;; + + '') # "--" terminates argument processing + break + ;; + + *) + echo "Invalid option “--$OPTARG”!" >&2 + exit 2 + ;; + + esac + ;; + + esac + done +} + +_snapshot_count() { + # FreeBSD wc adds some whitespaces before the number! + # cat $HOME/debug | wc -l + # 7 + local COUNT + COUNT="$(zfs list -t snapshot | grep "$1" | wc -l)" + echo $COUNT +} + +_performance_data() { + echo "| \ +last_ago=${DIFF}s;$OPT_WARNING;$OPT_CRITICAL;0 \ +count=$(_snapshot_count "$OPT_DATASET");;;0\ +" +} + +## This SEPARATOR is required for test purposes. Please don’t remove! ## + +_getopts $@ + +if [ -z "$OPT_WARNING" ]; then + # 1 day + OPT_WARNING=86400 +fi + +if [ -z "$OPT_CRITICAL" ]; then + # 3 day + OPT_CRITICAL=259200 +fi + +if [ -z "$OPT_DATASET" ]; then + echo "Dataset has to be set! Use option -d " >&2 + echo "$USAGE" >&2 + exit $STATE_UNKNOWN +fi + +if ! zfs list "$OPT_DATASET" > /dev/null 2>&1; then + echo "'$OPT_DATASET' is no ZFS dataset!" >&2 + echo "$USAGE" >&2 + exit $STATE_UNKNOWN +fi + +NOW=$(_now_to_timestamp) + +CREATION_DATE=$(_get_last_snapshot "$OPT_DATASET") + +DIFF=$((NOW - CREATION_DATE)) + +if [ "$OPT_WARNING" -gt "$OPT_CRITICAL" ]; then + echo '-w OPT_WARNING must be smaller than -c OPT_CRITICAL' + _usage >&2 + exit $STATE_UNKNOWN +fi + +RETURN=STATE_UNKNOWN + +if [ "$DIFF" -gt "$OPT_CRITICAL" ]; then + RETURN=$STATE_CRITICAL + MESSAGE="CRITICAL:" +elif [ "$DIFF" -gt "$OPT_WARNING" ]; then + RETURN=$STATE_WARNING + MESSAGE="WARNING:" +else + RETURN=$STATE_OK + MESSAGE="OK:" +fi + +DATE="$(_timestamp_to_datetime "$CREATION_DATE")" + +echo "$MESSAGE Last snapshot for dataset '$OPT_DATASET' was created on $DATE $(_performance_data)" + +exit $RETURN -- 2.41.0