From 171d8e1a8861e5844f6cb8d1623b93b0e86aabea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Isma=C3=ABl=20Bouya?= Date: Thu, 18 Jun 2020 06:47:08 +0200 Subject: Add monitoring for duply backup --- modules/duply_backup/default.nix | 1 + modules/private/monitoring/default.nix | 21 ++++++- modules/private/monitoring/objects_common.nix | 1 + .../private/monitoring/objects_monitoring-1.nix | 24 +++++++- .../private/monitoring/plugins/check_eriomem_age | 66 ++++++++++++++++++++++ modules/private/monitoring/to_objects.nix | 10 ++++ modules/private/system/monitoring-1.nix | 7 +++ 7 files changed, 125 insertions(+), 5 deletions(-) create mode 100755 modules/private/monitoring/plugins/check_eriomem_age (limited to 'modules') diff --git a/modules/duply_backup/default.nix b/modules/duply_backup/default.nix index 1e115be..bce4d65 100644 --- a/modules/duply_backup/default.nix +++ b/modules/duply_backup/default.nix @@ -82,6 +82,7 @@ in '' touch ${varDir}/${k}.log ${pkgs.duply}/bin/duply ${config.secrets.location}/backup/${k}/ ${action} --force >> ${varDir}/${k}.log + [[ $? = 0 ]] || echo -e "Error when doing backup for ${k}, see above\n---------------------------------------" >&2 '' ) config.services.duplyBackup.profiles)} ''; diff --git a/modules/private/monitoring/default.nix b/modules/private/monitoring/default.nix index 8ae0b30..316c2dd 100644 --- a/modules/private/monitoring/default.nix +++ b/modules/private/monitoring/default.nix @@ -58,6 +58,9 @@ let wrapProgram $out/check_eriomem --prefix PATH : ${lib.makeBinPath [ pkgs.s3cmd pkgs.python3 ]} + wrapProgram $out/check_eriomem_age --prefix PATH : ${lib.makeBinPath [ + pkgs.duplicity + ]} --set SECRETS_PATH ${lib.optionalString cfg.master config.secrets.fullPaths."eriomem_access_key"} wrapProgram $out/notify_by_email --prefix PATH : ${lib.makeBinPath [ pkgs.mailutils ]} @@ -136,7 +139,7 @@ let lib.attrsets.optionalAttrs (builtins.pathExists specific_file) (pkgs.callPackage specific_file { - inherit config emailCheck; + inherit config nodes emailCheck; hostFQDN = config.hostEnv.fqdn; hostName = name; }); @@ -232,10 +235,22 @@ in dest = "naemon/id_rsa"; user = "naemon"; group = "naemon"; - premissions = "0400"; + permissions = "0400"; text = config.myEnv.monitoring.ssh_secret_key; } - ]; + ] ++ lib.optional cfg.master ( + { + dest = "eriomem_access_key"; + user = "naemon"; + group = "naemon"; + permissions = "0400"; + text = '' + export AWS_ACCESS_KEY_ID="${config.myEnv.backup.accessKeyId}" + export AWS_SECRET_ACCESS_KEY="${config.myEnv.backup.secretAccessKey}" + export BASE_URL="${config.myEnv.backup.remote}" + ''; + } + ); # needed since extraResource is not in the closure systemd.services.naemon.path = [ myplugins ]; services.naemon = { diff --git a/modules/private/monitoring/objects_common.nix b/modules/private/monitoring/objects_common.nix index 2585c38..c0a17e6 100644 --- a/modules/private/monitoring/objects_common.nix +++ b/modules/private/monitoring/objects_common.nix @@ -94,6 +94,7 @@ in check_emails = "$USER2$/check_emails -H $HOSTADDRESS$ -i $USER203$ -l $ARG1$ -p $ARG2$ -s $ARG3$ -f $ARG4$"; check_emails_local = "$USER2$/check_emails -H $HOSTADDRESS$ -n $ARG1$ -r $ADMINEMAIL$ -s $ARG2$ -f $ARG3$"; check_eriomem = "$USER2$/check_eriomem $USER208$"; + check_eriomem_age = "$USER2$/check_eriomem_age $ARG1$"; check_external_dns = "$USER1$/check_dns -H $ARG2$ -s $ARG1$ $ARG3$"; check_ftp_database = "$USER2$/check_ftp_database"; check_git = "$USER2$/check_git $USER203$"; diff --git a/modules/private/monitoring/objects_monitoring-1.nix b/modules/private/monitoring/objects_monitoring-1.nix index ec6fdce..02870ed 100644 --- a/modules/private/monitoring/objects_monitoring-1.nix +++ b/modules/private/monitoring/objects_monitoring-1.nix @@ -1,4 +1,23 @@ -{ config, pkgs, hostFQDN, emailCheck, ... }: +{ config, pkgs, nodes, hostFQDN, emailCheck, ... }: +let + to_eriomem_age_dependency = name: { + dependent_host_name = "eldiron.immae.eu"; + host_name = "eldiron.immae.eu"; + dependent_service_description = "Eriomem backup for ${name} is not too old"; + service_description = "Eriomem backup is up and not full"; + execution_failure_criteria = "u"; + notification_failure_criteria = "u"; + }; + to_eriomem_age = name: { + service_description = "Eriomem backup for ${name} is not too old"; + host_name = "eldiron.immae.eu"; + use = "external-service"; + check_command = ["check_eriomem_age" name]; + + check_interval = "120"; + notification_interval = "120"; + }; +in { host = { # Dummy host for testing @@ -654,7 +673,7 @@ _webstatus_name = "LDAP"; _webstatus_url = "ldap.immae.eu"; } - ]; + ] ++ map to_eriomem_age (builtins.attrNames nodes.eldiron.config.services.duplyBackup.profiles); contact = { telio-tortay = config.myEnv.monitoring.contacts.telio-tortay // { use = "generic-contact"; @@ -664,4 +683,5 @@ contactgroup = { telio-tortay = { alias = "Telio Tortay"; members = "immae"; }; }; + servicedependency = map to_eriomem_age_dependency (builtins.attrNames nodes.eldiron.config.services.duplyBackup.profiles); } diff --git a/modules/private/monitoring/plugins/check_eriomem_age b/modules/private/monitoring/plugins/check_eriomem_age new file mode 100755 index 0000000..4d03b82 --- /dev/null +++ b/modules/private/monitoring/plugins/check_eriomem_age @@ -0,0 +1,66 @@ +#!/usr/bin/env bash + +set -euo pipefail + +source $SECRETS_PATH +export HOME=$(mktemp -d) + +trap "rm -rf $HOME" EXIT +folder=$1 + +parse_date() { + d=$1 + echo $d | sed -e "s/^\(....\)\(..\)\(..\)T\(..\)\(..\)\(..\)/\1-\2-\3T\4:\5:\6/" +} + +output=$(duplicity collection-status --log-fd 2 "$BASE_URL$folder" 2>&1 > /dev/null) + +output=$(echo "$output" | grep -v "^\.") + +last_full=$(parse_date $(echo "$output" | grep "^ full " | cut -d' ' -f3 | sort | tail -n1)) +last_bkp=$(parse_date $(echo "$output" | grep -E "^ (full|inc) " | cut -d' ' -f3 | sort | tail -n1)) +orphaned_sets=$(echo "$output" | grep "^orphaned-sets-num" | cut -d' ' -f2) +incomplete_sets=$(echo "$output" | grep "^incomplete-sets-num" | cut -d' ' -f2) + +if [[ -z "$last_full" || -z "$last_bkp" || -z "$orphaned_sets" || -z "$incomplete_sets" ]]; then + echo "duply-backup $folder UNKNOWN - impossible to parse result" + exit 3 +fi + +last_full_age=$(( ($(date "+%s") - $(date -d "$last_full" "+%s")) / (60*60*24) )) +last_bkp_age=$(( ($(date "+%s") - $(date -d "$last_bkp" "+%s")) / (60*60) )) + +PERFS="orphan=$orphaned_sets;1;;0; incomplete=$incomplete_sets;1;;0; age=${last_bkp_age}h;30;48;0; full_age=${last_full_age}d;35;45;0;" + + +WARNINGS="" +ERRORS="" +if [[ "$incomplete_sets" -gt 0 ]]; then + WARNINGS="$WARNINGS - Incomplete sets is $incomplete_sets" +fi + +if [[ "$orphaned_sets" -gt 0 ]]; then + WARNINGS="$WARNINGS - Orphaned sets is $orphaned_sets" +fi + +if [[ "$last_full_age" -gt 45 ]]; then + ERRORS="$ERRORS - Last full backup is too old $last_full" +elif [[ "$last_full_age" -gt 35 ]]; then + WARNINGS="$WARNINGS - Last full backup is getting old $last_full" +fi + +if [[ "$last_bkp_age" -gt 48 ]]; then + ERRORS="$ERRORS - Last backup is too old $last_bkp" +elif [[ "$last_bkp_age" -gt 30 ]]; then + WARNINGS="$WARNINGS - Last backup is getting old $last_bkp" +fi + +if [[ -n "$ERRORS" ]]; then + echo "duply-backup $folder CRITICAL$ERRORS$WARNINGS | $PERFS" + exit 2 +elif [[ -n "$WARNINGS" ]]; then + echo "duply-backup $folder WARNING$WARNINGS | $PERFS" + exit 1 +else + echo "duply-backup $folder OK | $PERFS" +fi diff --git a/modules/private/monitoring/to_objects.nix b/modules/private/monitoring/to_objects.nix index 7b4b523..12721d2 100644 --- a/modules/private/monitoring/to_objects.nix +++ b/modules/private/monitoring/to_objects.nix @@ -38,7 +38,15 @@ let ) v)} } ''; + toOtherNoName = keyname: v: '' + define ${keyname} { + ${builtins.concatStringsSep "\n" (mapAttrsToList (kk: vv: + " ${pad 30 kk} ${vv}" + ) v)} + } + ''; toOthers = keyname: a: builtins.concatStringsSep "\n" (mapAttrsToList (toOther keyname) a); + toOthersArray = keyname: a: builtins.concatStringsSep "\n" (map (toOtherNoName keyname) a); toTemplate = keyname: k: v: '' define ${keyname} { @@ -61,6 +69,8 @@ let then toTemplates v else if builtins.elem keyname ["hostgroup" "host" "contactgroup" "contact" "timeperiod" "servicegroup"] then toOthers keyname v + else if builtins.elem keyname ["servicedependency"] + then toOthersArray keyname v else ""; toObjects = v: builtins.concatStringsSep "\n" (mapAttrsToList toObjects' v); in diff --git a/modules/private/system/monitoring-1.nix b/modules/private/system/monitoring-1.nix index 7581c01..c87c784 100644 --- a/modules/private/system/monitoring-1.nix +++ b/modules/private/system/monitoring-1.nix @@ -31,6 +31,13 @@ }; myServices.mailRelay.enable = true; + security.pki.certificateFiles = [ + (pkgs.fetchurl { + url = "http://downloads.e.eriomem.net/eriomemca.pem"; + sha256 = "1ixx4c6j3m26j8dp9a3dkvxc80v1nr5aqgmawwgs06bskasqkvvh"; + }) + ]; + # This value determines the NixOS release with which your system is # to be compatible, in order to avoid breaking some software such as # database servers. You should change this only after NixOS release -- cgit v1.2.3