diff options
author | Ismaël Bouya <ismael.bouya@normalesup.org> | 2021-05-02 15:35:50 +0200 |
---|---|---|
committer | Ismaël Bouya <ismael.bouya@normalesup.org> | 2021-05-02 15:35:50 +0200 |
commit | f46b2c61a7a6c7c494f801002ddcf73fcc53fee4 (patch) | |
tree | a2dafdd6af4d670e0f48eb2f50524dc7efd777e2 /modules/private | |
parent | e64a496820b90607cba3762db3ce77847aaac22d (diff) | |
download | Nix-f46b2c61a7a6c7c494f801002ddcf73fcc53fee4.tar.gz Nix-f46b2c61a7a6c7c494f801002ddcf73fcc53fee4.tar.zst Nix-f46b2c61a7a6c7c494f801002ddcf73fcc53fee4.zip |
Add snapshot date check for monitoring
Diffstat (limited to 'modules/private')
-rw-r--r-- | modules/private/monitoring/myplugins.nix | 6 | ||||
-rw-r--r-- | modules/private/monitoring/objects_dilion.nix | 9 | ||||
-rwxr-xr-x | modules/private/monitoring/plugins/check_zfs_snapshot | 325 |
3 files changed, 340 insertions, 0 deletions
diff --git a/modules/private/monitoring/myplugins.nix b/modules/private/monitoring/myplugins.nix index 86b5f1e..e59ddc4 100644 --- a/modules/private/monitoring/myplugins.nix +++ b/modules/private/monitoring/myplugins.nix | |||
@@ -365,6 +365,7 @@ in | |||
365 | zfs = { | 365 | zfs = { |
366 | commands = { | 366 | commands = { |
367 | check_zfs = "$USER2$/check_zpool.sh -p ALL -w 80 -c 90"; | 367 | check_zfs = "$USER2$/check_zpool.sh -p ALL -w 80 -c 90"; |
368 | check_zfs_snapshot = "$USER2$/check_zfs_snapshot -d $ARG1$ -c 18000 -w 14400"; | ||
368 | }; | 369 | }; |
369 | chunk = let | 370 | chunk = let |
370 | zfsPlugin = pkgs.fetchurl { | 371 | zfsPlugin = pkgs.fetchurl { |
@@ -378,6 +379,11 @@ in | |||
378 | wrapProgram $out/check_zpool.sh --prefix PATH : ${lib.makeBinPath [ | 379 | wrapProgram $out/check_zpool.sh --prefix PATH : ${lib.makeBinPath [ |
379 | pkgs.which pkgs.zfs pkgs.gawk | 380 | pkgs.which pkgs.zfs pkgs.gawk |
380 | ]} | 381 | ]} |
382 | cp ${./plugins}/check_zfs_snapshot $out | ||
383 | patchShebangs $out/check_zfs_snapshot | ||
384 | wrapProgram $out/check_zfs_snapshot --prefix PATH : ${lib.makeBinPath [ | ||
385 | pkgs.zfs pkgs.coreutils pkgs.gawk pkgs.gnugrep | ||
386 | ]} | ||
381 | ''; | 387 | ''; |
382 | }; | 388 | }; |
383 | } | 389 | } |
diff --git a/modules/private/monitoring/objects_dilion.nix b/modules/private/monitoring/objects_dilion.nix index 1baaf39..16b3c64 100644 --- a/modules/private/monitoring/objects_dilion.nix +++ b/modules/private/monitoring/objects_dilion.nix | |||
@@ -9,6 +9,12 @@ let | |||
9 | servicegroups = "webstatus-resources"; | 9 | servicegroups = "webstatus-resources"; |
10 | host_name = hostFQDN; | 10 | host_name = hostFQDN; |
11 | }; | 11 | }; |
12 | zfs_snapshot = name: { | ||
13 | passiveInfo = defaultPassiveInfo // { servicegroups = "webstatus-resources"; }; | ||
14 | service_description = "ZFS snapshot ${name} happened not too long ago"; | ||
15 | use = "local-service"; | ||
16 | check_command = ["check_zfs_snapshot" name]; | ||
17 | }; | ||
12 | in | 18 | in |
13 | { | 19 | { |
14 | activatedPlugins = [ "zfs" ]; | 20 | activatedPlugins = [ "zfs" ]; |
@@ -19,5 +25,8 @@ in | |||
19 | use = "local-service"; | 25 | use = "local-service"; |
20 | check_command = ["check_zfs"]; | 26 | check_command = ["check_zfs"]; |
21 | } | 27 | } |
28 | (zfs_snapshot "zpool/backup/eldiron/zpool/root") | ||
29 | (zfs_snapshot "zpool/backup/eldiron/zpool/root/etc") | ||
30 | (zfs_snapshot "zpool/backup/eldiron/zpool/root/var") | ||
22 | ]; | 31 | ]; |
23 | } | 32 | } |
diff --git a/modules/private/monitoring/plugins/check_zfs_snapshot b/modules/private/monitoring/plugins/check_zfs_snapshot new file mode 100755 index 0000000..56f8c4f --- /dev/null +++ b/modules/private/monitoring/plugins/check_zfs_snapshot | |||
@@ -0,0 +1,325 @@ | |||
1 | #! /bin/sh | ||
2 | |||
3 | OS=$(uname) | ||
4 | |||
5 | # MIT License | ||
6 | # | ||
7 | # Copyright (c) 2016 Josef Friedrich <josef@friedrich.rocks> | ||
8 | # | ||
9 | # Permission is hereby granted, free of charge, to any person obtaining | ||
10 | # a copy of this software and associated documentation files (the | ||
11 | # "Software"), to deal in the Software without restriction, including | ||
12 | # without limitation the rights to use, copy, modify, merge, publish, | ||
13 | # distribute, sublicense, and/or sell copies of the Software, and to | ||
14 | # permit persons to whom the Software is furnished to do so, subject to | ||
15 | # the following conditions: | ||
16 | # | ||
17 | # The above copyright notice and this permission notice shall be | ||
18 | # included in all copies or substantial portions of the Software. | ||
19 | # | ||
20 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
21 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
22 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | ||
23 | # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | ||
24 | # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
25 | # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
26 | # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
27 | |||
28 | ######################################################################## | ||
29 | # Date functions | ||
30 | ######################################################################## | ||
31 | |||
32 | # This date function must be placed on the top of this file because | ||
33 | # they are used in some global variables. | ||
34 | |||
35 | # to_year ### | ||
36 | |||
37 | ## | ||
38 | # Get the four digit year integer from now. | ||
39 | # | ||
40 | # Return: | ||
41 | # The current 4 digit year. | ||
42 | ## | ||
43 | _now_to_year() { | ||
44 | date +%Y | ||
45 | } | ||
46 | |||
47 | ## | ||
48 | # Convert a date in the format YYYY-MM-DD to a four digit year integer. | ||
49 | # | ||
50 | # Parameters: | ||
51 | # a date in the format YYYY-MM-DD | ||
52 | # | ||
53 | # Return: | ||
54 | # four digit year integer | ||
55 | ## | ||
56 | _date_to_year() { | ||
57 | local OPTIONS | ||
58 | if [ "$OS" = 'Linux' ]; then | ||
59 | OPTIONS="--date $1" | ||
60 | # FreeBSD, Darwin | ||
61 | else | ||
62 | OPTIONS="-j -f %Y-%m-%d $1" | ||
63 | fi | ||
64 | date $OPTIONS +%Y | ||
65 | } | ||
66 | |||
67 | # to_datetime ### | ||
68 | |||
69 | ## | ||
70 | # Convert a UNIX timestamp to a datetime string. | ||
71 | # | ||
72 | # Parameters: | ||
73 | # UNIX timestamp | ||
74 | # | ||
75 | # Return: | ||
76 | # %Y-%m-%d.%H:%M:%S | ||
77 | ## | ||
78 | _timestamp_to_datetime() { | ||
79 | local OPTIONS | ||
80 | if [ "$OS" = 'Linux' ]; then | ||
81 | OPTIONS="--date @$1" | ||
82 | # FreeBSD, Darwin | ||
83 | else | ||
84 | OPTIONS="-j -f %s $1" | ||
85 | fi | ||
86 | date $OPTIONS +%Y-%m-%d.%H:%M:%S | ||
87 | } | ||
88 | |||
89 | # to_timestamp ### | ||
90 | |||
91 | ## | ||
92 | # Get the current UNIX timestamp. | ||
93 | # | ||
94 | # Return: | ||
95 | # %current UNIX timestamp | ||
96 | ## | ||
97 | _now_to_timestamp() { | ||
98 | date +%s | ||
99 | } | ||
100 | |||
101 | PROJECT_PAGES='https://github.com/Josef-Friedrich/check_zfs_snapshot | ||
102 | https://exchange.icinga.com/joseffriedrich/check_zfs_snapshot | ||
103 | https://exchange.nagios.org/directory/Plugins/System-Metrics/File-System/check_zfs_snapshot/details' | ||
104 | |||
105 | VERSION=1.2 | ||
106 | FIRST_RELEASE=2016-09-08 | ||
107 | SHORT_DESCRIPTION="Monitoring plugin to check how long ago the last \ | ||
108 | snapshot of a ZFS dataset was created." | ||
109 | USAGE="check_zfs_snapshot v$VERSION | ||
110 | Copyright (c) $(_date_to_year $FIRST_RELEASE)-$(_now_to_year) \ | ||
111 | Josef Friedrich <josef@friedrich.rocks> | ||
112 | |||
113 | $SHORT_DESCRIPTION | ||
114 | |||
115 | |||
116 | Usage: check_zfs_snapshot <options> | ||
117 | |||
118 | Options: | ||
119 | -c, --critical=OPT_CRITICAL | ||
120 | Interval in seconds for critical state. | ||
121 | -d, --dataset=OPT_DATASET | ||
122 | The ZFS dataset to check. | ||
123 | -h, --help | ||
124 | Show this help. | ||
125 | -s, --short-description | ||
126 | Show a short description of the command. | ||
127 | -v, --version | ||
128 | Show the version number. | ||
129 | -w, --warning=OPT_WARNING | ||
130 | Interval in seconds for warning state. Must be lower than -c | ||
131 | |||
132 | Performance data: | ||
133 | - last_ago | ||
134 | Time interval in seconds for last snapshot. | ||
135 | - warning | ||
136 | Interval in seconds. | ||
137 | - critical | ||
138 | Interval in seconds. | ||
139 | - snapshot_count | ||
140 | How many snapshot exists in the given dataset and all child | ||
141 | datasets exists. | ||
142 | " | ||
143 | |||
144 | # Exit codes | ||
145 | STATE_OK=0 | ||
146 | STATE_WARNING=1 | ||
147 | STATE_CRITICAL=2 | ||
148 | STATE_UNKNOWN=3 | ||
149 | |||
150 | _get_last_snapshot() { | ||
151 | zfs get creation -Hpr -t snapshot "$1" | \ | ||
152 | awk 'BEGIN {max = 0} {if ($3>max) max=$3} END {print max}' | ||
153 | } | ||
154 | |||
155 | _getopts() { | ||
156 | while getopts ':c:d:hsvw:-:' OPT ; do | ||
157 | case $OPT in | ||
158 | |||
159 | c) | ||
160 | OPT_CRITICAL=$OPTARG | ||
161 | ;; | ||
162 | |||
163 | d) | ||
164 | OPT_DATASET="$OPTARG" | ||
165 | ;; | ||
166 | |||
167 | h) | ||
168 | echo "$USAGE" | ||
169 | exit 0 | ||
170 | ;; | ||
171 | |||
172 | s) | ||
173 | echo "$SHORT_DESCRIPTION" | ||
174 | exit 0 | ||
175 | ;; | ||
176 | |||
177 | v) | ||
178 | echo "$VERSION" | ||
179 | exit 0 | ||
180 | ;; | ||
181 | |||
182 | w) | ||
183 | OPT_WARNING=$OPTARG | ||
184 | ;; | ||
185 | |||
186 | \?) | ||
187 | echo "Invalid option “-$OPTARG”!" >&2 | ||
188 | exit 2 | ||
189 | ;; | ||
190 | |||
191 | :) | ||
192 | echo "Option “-$OPTARG” requires an argument!" >&2 | ||
193 | exit 3 | ||
194 | ;; | ||
195 | |||
196 | -) | ||
197 | LONG_OPTARG="${OPTARG#*=}" | ||
198 | |||
199 | case $OPTARG in | ||
200 | |||
201 | critical=?*) | ||
202 | OPT_CRITICAL=$LONG_OPTARG | ||
203 | ;; | ||
204 | |||
205 | dataset=?*) | ||
206 | OPT_DATASET="$LONG_OPTARG" | ||
207 | ;; | ||
208 | |||
209 | help) | ||
210 | echo "$USAGE" | ||
211 | exit 0 | ||
212 | ;; | ||
213 | |||
214 | short-description) | ||
215 | echo "$SHORT_DESCRIPTION" | ||
216 | exit 0 | ||
217 | ;; | ||
218 | |||
219 | version) | ||
220 | echo "$VERSION" | ||
221 | exit 0 | ||
222 | ;; | ||
223 | |||
224 | warning=?*) | ||
225 | OPT_WARNING=$LONG_OPTARG | ||
226 | ;; | ||
227 | |||
228 | critical*|dataset*|warning*) | ||
229 | echo "Option “--$OPTARG” requires an argument!" >&2 | ||
230 | exit 3 | ||
231 | ;; | ||
232 | |||
233 | help*|short-description*|version*) | ||
234 | echo "No argument allowed for the option “--$OPTARG”!" >&2 | ||
235 | exit 4 | ||
236 | ;; | ||
237 | |||
238 | '') # "--" terminates argument processing | ||
239 | break | ||
240 | ;; | ||
241 | |||
242 | *) | ||
243 | echo "Invalid option “--$OPTARG”!" >&2 | ||
244 | exit 2 | ||
245 | ;; | ||
246 | |||
247 | esac | ||
248 | ;; | ||
249 | |||
250 | esac | ||
251 | done | ||
252 | } | ||
253 | |||
254 | _snapshot_count() { | ||
255 | # FreeBSD wc adds some whitespaces before the number! | ||
256 | # cat $HOME/debug | wc -l | ||
257 | # 7 | ||
258 | local COUNT | ||
259 | COUNT="$(zfs list -t snapshot | grep "$1" | wc -l)" | ||
260 | echo $COUNT | ||
261 | } | ||
262 | |||
263 | _performance_data() { | ||
264 | echo "| \ | ||
265 | last_ago=${DIFF}s;$OPT_WARNING;$OPT_CRITICAL;0 \ | ||
266 | count=$(_snapshot_count "$OPT_DATASET");;;0\ | ||
267 | " | ||
268 | } | ||
269 | |||
270 | ## This SEPARATOR is required for test purposes. Please don’t remove! ## | ||
271 | |||
272 | _getopts $@ | ||
273 | |||
274 | if [ -z "$OPT_WARNING" ]; then | ||
275 | # 1 day | ||
276 | OPT_WARNING=86400 | ||
277 | fi | ||
278 | |||
279 | if [ -z "$OPT_CRITICAL" ]; then | ||
280 | # 3 day | ||
281 | OPT_CRITICAL=259200 | ||
282 | fi | ||
283 | |||
284 | if [ -z "$OPT_DATASET" ]; then | ||
285 | echo "Dataset has to be set! Use option -d <dataset>" >&2 | ||
286 | echo "$USAGE" >&2 | ||
287 | exit $STATE_UNKNOWN | ||
288 | fi | ||
289 | |||
290 | if ! zfs list "$OPT_DATASET" > /dev/null 2>&1; then | ||
291 | echo "'$OPT_DATASET' is no ZFS dataset!" >&2 | ||
292 | echo "$USAGE" >&2 | ||
293 | exit $STATE_UNKNOWN | ||
294 | fi | ||
295 | |||
296 | NOW=$(_now_to_timestamp) | ||
297 | |||
298 | CREATION_DATE=$(_get_last_snapshot "$OPT_DATASET") | ||
299 | |||
300 | DIFF=$((NOW - CREATION_DATE)) | ||
301 | |||
302 | if [ "$OPT_WARNING" -gt "$OPT_CRITICAL" ]; then | ||
303 | echo '-w OPT_WARNING must be smaller than -c OPT_CRITICAL' | ||
304 | _usage >&2 | ||
305 | exit $STATE_UNKNOWN | ||
306 | fi | ||
307 | |||
308 | RETURN=STATE_UNKNOWN | ||
309 | |||
310 | if [ "$DIFF" -gt "$OPT_CRITICAL" ]; then | ||
311 | RETURN=$STATE_CRITICAL | ||
312 | MESSAGE="CRITICAL:" | ||
313 | elif [ "$DIFF" -gt "$OPT_WARNING" ]; then | ||
314 | RETURN=$STATE_WARNING | ||
315 | MESSAGE="WARNING:" | ||
316 | else | ||
317 | RETURN=$STATE_OK | ||
318 | MESSAGE="OK:" | ||
319 | fi | ||
320 | |||
321 | DATE="$(_timestamp_to_datetime "$CREATION_DATE")" | ||
322 | |||
323 | echo "$MESSAGE Last snapshot for dataset '$OPT_DATASET' was created on $DATE $(_performance_data)" | ||
324 | |||
325 | exit $RETURN | ||