]> git.immae.eu Git - perso/Immae/Config/Nix.git/commitdiff
Add snapshot date check for monitoring
authorIsmaël Bouya <ismael.bouya@normalesup.org>
Sun, 2 May 2021 13:35:50 +0000 (15:35 +0200)
committerIsmaël Bouya <ismael.bouya@normalesup.org>
Sun, 2 May 2021 13:35:50 +0000 (15:35 +0200)
modules/private/monitoring/myplugins.nix
modules/private/monitoring/objects_dilion.nix
modules/private/monitoring/plugins/check_zfs_snapshot [new file with mode: 0755]

index 86b5f1ec29449e5d1d5271e1dfe00368a77735b4..e59ddc402f1793365feb14e504f3b8c804f4d6f0 100644 (file)
@@ -365,6 +365,7 @@ in
   zfs = {
     commands = {
       check_zfs = "$USER2$/check_zpool.sh -p ALL -w 80 -c 90";
+      check_zfs_snapshot = "$USER2$/check_zfs_snapshot -d $ARG1$ -c 18000 -w 14400";
     };
     chunk = let
       zfsPlugin = pkgs.fetchurl {
@@ -378,6 +379,11 @@ in
       wrapProgram $out/check_zpool.sh --prefix PATH : ${lib.makeBinPath [
         pkgs.which pkgs.zfs pkgs.gawk
       ]}
+      cp ${./plugins}/check_zfs_snapshot $out
+      patchShebangs $out/check_zfs_snapshot
+      wrapProgram $out/check_zfs_snapshot --prefix PATH : ${lib.makeBinPath [
+        pkgs.zfs pkgs.coreutils pkgs.gawk pkgs.gnugrep
+      ]}
     '';
   };
 }
index 1baaf398f5dff54c59bc951f79ee1c73826d719d..16b3c646c454262486f408c6bb19799165695a3a 100644 (file)
@@ -9,6 +9,12 @@ let
     servicegroups = "webstatus-resources";
     host_name = hostFQDN;
   };
+  zfs_snapshot = name: {
+    passiveInfo = defaultPassiveInfo // { servicegroups = "webstatus-resources"; };
+    service_description = "ZFS snapshot ${name} happened not too long ago";
+    use = "local-service";
+    check_command = ["check_zfs_snapshot" name];
+  };
 in
 {
   activatedPlugins = [ "zfs" ];
@@ -19,5 +25,8 @@ in
       use = "local-service";
       check_command = ["check_zfs"];
     }
+    (zfs_snapshot "zpool/backup/eldiron/zpool/root")
+    (zfs_snapshot "zpool/backup/eldiron/zpool/root/etc")
+    (zfs_snapshot "zpool/backup/eldiron/zpool/root/var")
   ];
 }
diff --git a/modules/private/monitoring/plugins/check_zfs_snapshot b/modules/private/monitoring/plugins/check_zfs_snapshot
new file mode 100755 (executable)
index 0000000..56f8c4f
--- /dev/null
@@ -0,0 +1,325 @@
+#! /bin/sh
+
+OS=$(uname)
+
+# MIT License
+#
+# Copyright (c) 2016 Josef Friedrich <josef@friedrich.rocks>
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+########################################################################
+# Date functions
+########################################################################
+
+# This date function must be placed on the top of this file because
+# they are used in some global variables.
+
+# to_year ###
+
+##
+# Get the four digit year integer from now.
+#
+# Return:
+#   The current 4 digit year.
+##
+_now_to_year() {
+       date +%Y
+}
+
+##
+# Convert a date in the format YYYY-MM-DD to a four digit year integer.
+#
+# Parameters:
+#   a date in the format YYYY-MM-DD
+#
+# Return:
+#   four digit year integer
+##
+_date_to_year() {
+       local OPTIONS
+       if [ "$OS" = 'Linux' ]; then
+               OPTIONS="--date $1"
+       # FreeBSD, Darwin
+       else
+               OPTIONS="-j -f %Y-%m-%d $1"
+       fi
+       date $OPTIONS +%Y
+}
+
+# to_datetime ###
+
+##
+# Convert a UNIX timestamp to a datetime string.
+#
+# Parameters:
+#   UNIX timestamp
+#
+# Return:
+#   %Y-%m-%d.%H:%M:%S
+##
+_timestamp_to_datetime() {
+       local OPTIONS
+       if [ "$OS" = 'Linux' ]; then
+               OPTIONS="--date @$1"
+       # FreeBSD, Darwin
+       else
+               OPTIONS="-j -f %s $1"
+       fi
+       date $OPTIONS +%Y-%m-%d.%H:%M:%S
+}
+
+# to_timestamp ###
+
+##
+# Get the current UNIX timestamp.
+#
+# Return:
+#   %current UNIX timestamp
+##
+_now_to_timestamp() {
+       date +%s
+}
+
+PROJECT_PAGES='https://github.com/Josef-Friedrich/check_zfs_snapshot
+https://exchange.icinga.com/joseffriedrich/check_zfs_snapshot
+https://exchange.nagios.org/directory/Plugins/System-Metrics/File-System/check_zfs_snapshot/details'
+
+VERSION=1.2
+FIRST_RELEASE=2016-09-08
+SHORT_DESCRIPTION="Monitoring plugin to check how long ago the last \
+snapshot of a ZFS dataset was created."
+USAGE="check_zfs_snapshot v$VERSION
+Copyright (c) $(_date_to_year $FIRST_RELEASE)-$(_now_to_year) \
+Josef Friedrich <josef@friedrich.rocks>
+
+$SHORT_DESCRIPTION
+
+
+Usage: check_zfs_snapshot <options>
+
+Options:
+ -c, --critical=OPT_CRITICAL
+    Interval in seconds for critical state.
+ -d, --dataset=OPT_DATASET
+    The ZFS dataset to check.
+ -h, --help
+    Show this help.
+ -s, --short-description
+    Show a short description of the command.
+ -v, --version
+    Show the version number.
+ -w, --warning=OPT_WARNING
+    Interval in seconds for warning state. Must be lower than -c
+
+Performance data:
+ - last_ago
+    Time interval in seconds for last snapshot.
+ - warning
+    Interval in seconds.
+ - critical
+    Interval in seconds.
+ - snapshot_count
+    How many snapshot exists in the given dataset and all child
+    datasets exists.
+"
+
+# Exit codes
+STATE_OK=0
+STATE_WARNING=1
+STATE_CRITICAL=2
+STATE_UNKNOWN=3
+
+_get_last_snapshot() {
+       zfs get creation -Hpr -t snapshot "$1" | \
+               awk 'BEGIN {max = 0} {if ($3>max) max=$3} END {print max}'
+}
+
+_getopts() {
+       while getopts ':c:d:hsvw:-:' OPT ; do
+               case $OPT in
+
+                       c)
+                               OPT_CRITICAL=$OPTARG
+                               ;;
+
+                       d)
+                               OPT_DATASET="$OPTARG"
+                               ;;
+
+                       h)
+                               echo "$USAGE"
+                               exit 0
+                               ;;
+
+                       s)
+                               echo "$SHORT_DESCRIPTION"
+                               exit 0
+                               ;;
+
+                       v)
+                               echo "$VERSION"
+                               exit 0
+                               ;;
+
+                       w)
+                               OPT_WARNING=$OPTARG
+                               ;;
+
+                       \?)
+                               echo "Invalid option “-$OPTARG”!" >&2
+                               exit 2
+                               ;;
+
+                       :)
+                               echo "Option “-$OPTARG” requires an argument!" >&2
+                               exit 3
+                               ;;
+
+                       -)
+                               LONG_OPTARG="${OPTARG#*=}"
+
+                               case $OPTARG in
+
+                                       critical=?*)
+                                               OPT_CRITICAL=$LONG_OPTARG
+                                               ;;
+
+                                       dataset=?*)
+                                               OPT_DATASET="$LONG_OPTARG"
+                                               ;;
+
+                                       help)
+                                               echo "$USAGE"
+                                               exit 0
+                                               ;;
+
+                                       short-description)
+                                               echo "$SHORT_DESCRIPTION"
+                                               exit 0
+                                               ;;
+
+                                       version)
+                                               echo "$VERSION"
+                                               exit 0
+                                               ;;
+
+                                       warning=?*)
+                                               OPT_WARNING=$LONG_OPTARG
+                                               ;;
+
+                                       critical*|dataset*|warning*)
+                                               echo "Option “--$OPTARG” requires an argument!" >&2
+                                               exit 3
+                                               ;;
+
+                                       help*|short-description*|version*)
+                                               echo "No argument allowed for the option “--$OPTARG”!" >&2
+                                               exit 4
+                                               ;;
+
+                                       '')     # "--" terminates argument processing
+                                               break
+                                               ;;
+
+                                       *)
+                                               echo "Invalid option “--$OPTARG”!" >&2
+                                               exit 2
+                                               ;;
+
+                               esac
+                               ;;
+
+               esac
+       done
+}
+
+_snapshot_count() {
+       # FreeBSD wc adds some whitespaces before the number!
+       # cat $HOME/debug | wc -l
+       #        7
+       local COUNT
+       COUNT="$(zfs list -t snapshot | grep "$1" | wc -l)"
+       echo $COUNT
+}
+
+_performance_data() {
+       echo "| \
+last_ago=${DIFF}s;$OPT_WARNING;$OPT_CRITICAL;0 \
+count=$(_snapshot_count "$OPT_DATASET");;;0\
+"
+}
+
+## This SEPARATOR is required for test purposes. Please don’t remove! ##
+
+_getopts $@
+
+if [ -z "$OPT_WARNING" ]; then
+       # 1 day
+       OPT_WARNING=86400
+fi
+
+if [ -z "$OPT_CRITICAL" ]; then
+       # 3 day
+       OPT_CRITICAL=259200
+fi
+
+if [ -z "$OPT_DATASET" ]; then
+       echo "Dataset has to be set! Use option -d <dataset>" >&2
+       echo "$USAGE" >&2
+       exit $STATE_UNKNOWN
+fi
+
+if ! zfs list "$OPT_DATASET" > /dev/null 2>&1; then
+       echo "'$OPT_DATASET' is no ZFS dataset!" >&2
+       echo "$USAGE" >&2
+       exit $STATE_UNKNOWN
+fi
+
+NOW=$(_now_to_timestamp)
+
+CREATION_DATE=$(_get_last_snapshot "$OPT_DATASET")
+
+DIFF=$((NOW - CREATION_DATE))
+
+if [ "$OPT_WARNING" -gt "$OPT_CRITICAL" ]; then
+       echo '-w OPT_WARNING must be smaller than -c OPT_CRITICAL'
+       _usage >&2
+       exit $STATE_UNKNOWN
+fi
+
+RETURN=STATE_UNKNOWN
+
+if [ "$DIFF" -gt "$OPT_CRITICAL" ]; then
+       RETURN=$STATE_CRITICAL
+       MESSAGE="CRITICAL:"
+elif [ "$DIFF" -gt "$OPT_WARNING" ]; then
+       RETURN=$STATE_WARNING
+       MESSAGE="WARNING:"
+else
+       RETURN=$STATE_OK
+       MESSAGE="OK:"
+fi
+
+DATE="$(_timestamp_to_datetime "$CREATION_DATE")"
+
+echo "$MESSAGE Last snapshot for dataset '$OPT_DATASET' was created on $DATE $(_performance_data)"
+
+exit $RETURN