From 1a64deeb894dc95e2645a75771732c6cc53a79ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Isma=C3=ABl=20Bouya?= Date: Wed, 4 Oct 2023 01:35:06 +0200 Subject: Squash changes containing private information There were a lot of changes since the previous commit, but a lot of them contained personnal information about users. All thos changes got stashed into a single commit (history is kept in a different place) and private information was moved in a separate private repository --- flakes/private/monitoring/flake.lock | 67 ++++ flakes/private/monitoring/flake.nix | 267 ++++++++++++++ flakes/private/monitoring/myplugins.nix | 400 +++++++++++++++++++++ flakes/private/monitoring/nagios-cli.cfg | 68 ++++ flakes/private/monitoring/objects_common.nix | 227 ++++++++++++ flakes/private/monitoring/plugins/check_backup_age | 66 ++++ flakes/private/monitoring/plugins/check_bandwidth | 122 +++++++ flakes/private/monitoring/plugins/check_command | 113 ++++++ flakes/private/monitoring/plugins/check_emails | 121 +++++++ flakes/private/monitoring/plugins/check_eriomem | 83 +++++ .../private/monitoring/plugins/check_ftp_database | 11 + flakes/private/monitoring/plugins/check_git | 81 +++++ .../monitoring/plugins/check_imap_connection | 52 +++ .../monitoring/plugins/check_last_file_date | 28 ++ flakes/private/monitoring/plugins/check_mem.sh | 31 ++ .../monitoring/plugins/check_mysql_replication | 41 +++ .../monitoring/plugins/check_openldap_replication | 54 +++ flakes/private/monitoring/plugins/check_ovh_sms | 25 ++ .../plugins/check_postgres_database_count | 32 ++ .../monitoring/plugins/check_postgres_replication | 35 ++ .../monitoring/plugins/check_redis_replication | 38 ++ .../private/monitoring/plugins/check_zfs_snapshot | 325 +++++++++++++++++ .../private/monitoring/plugins/notify_by_apprise | 31 ++ flakes/private/monitoring/plugins/notify_by_email | 29 ++ flakes/private/monitoring/plugins/notify_by_slack | 46 +++ flakes/private/monitoring/plugins/send_nrdp.sh | 57 +++ flakes/private/monitoring/send_mails | 15 + flakes/private/monitoring/to_objects.nix | 77 ++++ 28 files changed, 2542 insertions(+) create mode 100644 flakes/private/monitoring/flake.lock create mode 100644 flakes/private/monitoring/flake.nix create mode 100644 flakes/private/monitoring/myplugins.nix create mode 100644 flakes/private/monitoring/nagios-cli.cfg create mode 100644 flakes/private/monitoring/objects_common.nix create mode 100755 flakes/private/monitoring/plugins/check_backup_age create mode 100755 flakes/private/monitoring/plugins/check_bandwidth create mode 100755 flakes/private/monitoring/plugins/check_command create mode 100755 flakes/private/monitoring/plugins/check_emails create mode 100755 flakes/private/monitoring/plugins/check_eriomem create mode 100755 flakes/private/monitoring/plugins/check_ftp_database create mode 100755 flakes/private/monitoring/plugins/check_git create mode 100755 flakes/private/monitoring/plugins/check_imap_connection create mode 100755 flakes/private/monitoring/plugins/check_last_file_date create mode 100755 flakes/private/monitoring/plugins/check_mem.sh create mode 100755 flakes/private/monitoring/plugins/check_mysql_replication create mode 100755 flakes/private/monitoring/plugins/check_openldap_replication create mode 100755 flakes/private/monitoring/plugins/check_ovh_sms create mode 100755 flakes/private/monitoring/plugins/check_postgres_database_count create mode 100755 flakes/private/monitoring/plugins/check_postgres_replication create mode 100755 flakes/private/monitoring/plugins/check_redis_replication create mode 100755 flakes/private/monitoring/plugins/check_zfs_snapshot create mode 100755 flakes/private/monitoring/plugins/notify_by_apprise create mode 100755 flakes/private/monitoring/plugins/notify_by_email create mode 100755 flakes/private/monitoring/plugins/notify_by_slack create mode 100755 flakes/private/monitoring/plugins/send_nrdp.sh create mode 100755 flakes/private/monitoring/send_mails create mode 100644 flakes/private/monitoring/to_objects.nix (limited to 'flakes/private/monitoring') diff --git a/flakes/private/monitoring/flake.lock b/flakes/private/monitoring/flake.lock new file mode 100644 index 0000000..2e156fb --- /dev/null +++ b/flakes/private/monitoring/flake.lock @@ -0,0 +1,67 @@ +{ + "nodes": { + "environment": { + "locked": { + "lastModified": 1, + "narHash": "sha256-rMKbM7fHqWQbI7y59BsPG8KwoDj2jyrvN2niPWB24uE=", + "path": "../environment", + "type": "path" + }, + "original": { + "path": "../environment", + "type": "path" + } + }, + "naemon": { + "locked": { + "lastModified": 1, + "narHash": "sha256-6le57WLKj1HXdhe4cgYO6N0Z9nJZC+plQY8HhOwzEIk=", + "path": "../../naemon", + "type": "path" + }, + "original": { + "path": "../../naemon", + "type": "path" + } + }, + "nixpkgs-lib": { + "locked": { + "dir": "lib", + "lastModified": 1691269286, + "narHash": "sha256-7cPTz1bPhwq8smt9rHDcFtJsd1tFDcBukzj5jOXqjfk=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "85d4248a4f5aa6bc55dd2cea8131bb68b2d43804", + "type": "github" + }, + "original": { + "dir": "lib", + "owner": "NixOS", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "environment": "environment", + "naemon": "naemon", + "nixpkgs-lib": "nixpkgs-lib", + "secrets": "secrets" + } + }, + "secrets": { + "locked": { + "lastModified": 1, + "narHash": "sha256-5AakznhrJFmwCD7lr4JEh55MtdAJL6WA/YuBks6ISSE=", + "path": "../../secrets", + "type": "path" + }, + "original": { + "path": "../../secrets", + "type": "path" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flakes/private/monitoring/flake.nix b/flakes/private/monitoring/flake.nix new file mode 100644 index 0000000..b7c3997 --- /dev/null +++ b/flakes/private/monitoring/flake.nix @@ -0,0 +1,267 @@ +{ + inputs = { + environment.url = "path:../environment"; + secrets.url = "path:../../secrets"; + naemon.url = "path:../../naemon"; + nixpkgs-lib.url = "github:NixOS/nixpkgs?dir=lib"; + }; + outputs = { self, environment, nixpkgs-lib, secrets, naemon }: { + nagios-cli-config = ./nagios-cli.cfg; + lib = rec { + expandedObject = kind: object: objects: + if object ? "use" + then expandedObject kind objects.templates.${kind}.${object.use} objects // object + else object; + + objectsCommon = import ./objects_common.nix; + toObjects = import ./to_objects.nix { inherit (nixpkgs-lib) lib; }; + + toMasterPassiveObject = svcTemplate: freshnessThresholdMultiplier: objects: + { + service = with nixpkgs-lib.lib; map (s: + { + host_name = (expandedObject "service" s objects).host_name; + use = svcTemplate; + retry_interval = "1"; + freshness_threshold = let + fs = expandedObject "service" s objects; + in if builtins.isInt fs.check_interval + then builtins.ceil (freshnessThresholdMultiplier * 60 * fs.check_interval) + else fs.check_interval; + } + // filterAttrs (k: v: builtins.elem k ["service_description"] || builtins.substring 0 1 k == "_") s + // mapAttrs' + (n: nameValuePair (removePrefix "__passive_" n)) + (filterAttrs (k: _: hasPrefix "__passive_" k) s) + ) objects.service; + host = objects.host; + }; + + emailCheck = allCfg: host: hostFQDN: let + cfg = allCfg."${host}"; + reverseTargets = builtins.attrNames (nixpkgs-lib.lib.filterAttrs (k: v: builtins.elem host v.targets) allCfg); + to_email = cfg': host': + let sep = if nixpkgs-lib.lib.hasInfix "+" cfg'.mail_address then "_" else "+"; + in "${cfg'.mail_address}${sep}${host'}@${cfg'.mail_domain}"; + mails_to_send = builtins.concatStringsSep "," (map (n: to_email allCfg."${n}" host) cfg.targets); + mails_to_receive = builtins.concatStringsSep "," (map (n: "${to_email cfg n}:${n}") reverseTargets); + command = if cfg.local + then + [ "check_emails_local" "/var/lib/naemon/checks/email" mails_to_send mails_to_receive ] + else + [ "check_emails" cfg.login cfg.port mails_to_send mails_to_receive ]; + in + { + service_description = "${hostFQDN} email service is active"; + use = "mail-service"; + host_name = hostFQDN; + servicegroups = "webstatus-email"; + check_command = command; + }; + }; + nixosModule = self.nixosModules.monitoring; + nixosModules.monitoring = { config, pkgs, lib, ... }: + let + cfg = config.myServices.monitoring; + allPluginsConfig = import ./myplugins.nix { + inherit pkgs lib config; + sudo = "/run/wrappers/bin/sudo"; + }; + mypluginsConfig = lib.mapAttrs (n: v: + if builtins.isFunction v + then v (cfg.pluginsArgs."${n}" or {}) + else v + ) (lib.getAttrs cfg.activatedPlugins allPluginsConfig); + myplugins = let + mypluginsChunk = builtins.concatStringsSep "\n" (lib.mapAttrsToList (k: v: v.chunk or "") mypluginsConfig); + in pkgs.runCommand "buildplugins" { + buildInputs = [ pkgs.makeWrapper pkgs.perl ]; + } '' + mkdir $out + ${mypluginsChunk} + ''; + objectsModule = with lib.types; submodule { + options = { + command = lib.mkOption { + type = attrsOf str; + default = {}; + description = "Command definitions"; + }; + + host = lib.mkOption { + type = attrsOf (attrsOf str); + default = {}; + description = "Host definitions"; + }; + hostgroup = lib.mkOption { + type = attrsOf (attrsOf str); + default = {}; + description = "Host group definitions"; + }; + hostdependency = lib.mkOption { + type = listOf (attrsOf str); + default = []; + description = "Host dependency definitions"; + }; + + service = lib.mkOption { + type = listOf (attrsOf (oneOf [ str (listOf str) int ])); + # str -> string + # listOf str -> list to be concatenated with "!" + # int -> toString + default = []; + description = "Service definitions"; + }; + servicegroup = lib.mkOption { + type = attrsOf (attrsOf str); + default = {}; + description = "Service group definitions"; + }; + servicedependency = lib.mkOption { + type = listOf (attrsOf str); + default = []; + description = "Service dependency definitions"; + }; + + contact = lib.mkOption { + type = attrsOf (attrsOf str); + default = {}; + description = "Contact definitions"; + }; + contactgroup = lib.mkOption { + type = attrsOf (attrsOf str); + default = {}; + description = "Contact group definitions"; + }; + + timeperiod = lib.mkOption { + type = attrsOf (attrsOf str); + default = {}; + description = "Time period definitions"; + }; + + templates = lib.mkOption { + description = "Template definitions"; + default = {}; + type = submodule { + options = { + service = lib.mkOption { type = attrsOf (attrsOf (either str int)); default = {}; }; + contact = lib.mkOption { type = attrsOf (attrsOf str); default = {}; }; + host = lib.mkOption { type = attrsOf (attrsOf str); default = {}; }; + }; + }; + }; + }; + }; + in + { + options = { + myServices.monitoring = { + enable = lib.mkOption { + type = lib.types.bool; + default = false; + description = '' + Whether to enable monitoring. + ''; + }; + master = lib.mkOption { + type = lib.types.bool; + default = false; + description = '' + This instance is the master instance + ''; + }; + pluginsArgs = lib.mkOption { + default = {}; + description = "Arguments to pass to the naemon plugin configuration"; + type = lib.types.attrsOf (lib.types.attrsOf lib.types.unspecified); + }; + activatedPlugins = lib.mkOption { + default = []; + description = "List of naemon plugins to activate"; + type = lib.types.listOf (lib.types.enum (builtins.attrNames allPluginsConfig)); + }; + fromMasterActivatedPlugins = lib.mkOption { + default = []; + description = "List of naemon plugins to activate from master"; + type = lib.types.listOf (lib.types.str); + }; + resources = lib.mkOption { + default = {}; + description = "List of additionnal resources elements"; + type = lib.types.attrsOf (lib.types.str); + }; + objects = lib.mkOption { + default = {}; + description = "Object definitions"; + type = objectsModule; + }; + fromMasterObjects = lib.mkOption { + default = {}; + description = "Object definitions of checks that should be executed from master"; + type = objectsModule; + }; + }; + }; + + imports = [ + environment.nixosModule + secrets.nixosModule + naemon.nixosModule + ]; + config = lib.mkIf cfg.enable { + myServices.monitoring.objects.command = + lib.foldr (v: o: o // (v.commands or {})) {} (builtins.attrValues mypluginsConfig); + + security.sudo.extraRules = let + pluginsSudo = lib.lists.remove null (lib.mapAttrsToList (k: v: + if (v ? sudo) + then ({ users = [ "naemon" ]; } // (v.sudo myplugins)) + else null) mypluginsConfig); + in pluginsSudo; + + environment.etc.cnagios.source = "${pkgs.cnagios}/share/doc/cnagios"; + environment.systemPackages = let + nagios-cli = pkgs.writeScriptBin "nagios-cli" '' + #!${pkgs.stdenv.shell} + sudo -u naemon ${pkgs.nagios-cli}/bin/nagios-cli -c ${self.nagios-cli-config} + ''; + in [ + pkgs.cnagios + nagios-cli + ]; + secrets.keys = { + "naemon/id_rsa" = { + user = "naemon"; + group = "naemon"; + permissions = "0400"; + text = config.myEnv.monitoring.ssh_secret_key; + }; + "naemon/resources.cfg".keyDependencies = [ myplugins ]; + }; + services.naemon = { + enable = true; + extraConfig = '' + use_syslog=1 + log_initial_states=1 + date_format=iso8601 + admin_email=${config.myEnv.monitoring.email} + '' + lib.optionalString (!cfg.master) '' + obsess_over_services=1 + ocsp_command=notify-master + ''; + extraResource = let + resources = [cfg.resources or {}] ++ (lib.mapAttrsToList (k: v: v.resources or {}) mypluginsConfig); + joined = lib.zipAttrsWith (n: v: if builtins.length (lib.unique v) == 1 then builtins.head v else abort "Non-unique resources names") resources; + joinedStr = builtins.concatStringsSep "\n" (lib.mapAttrsToList (k: v: "$" + "${k}$=${v}") joined); + in '' + $USER2$=${myplugins} + ${joinedStr} + ''; + objectDefs = + self.lib.toObjects cfg.objects; + }; + }; + }; + }; +} diff --git a/flakes/private/monitoring/myplugins.nix b/flakes/private/monitoring/myplugins.nix new file mode 100644 index 0000000..35730bb --- /dev/null +++ b/flakes/private/monitoring/myplugins.nix @@ -0,0 +1,400 @@ +{ sudo, pkgs, lib, config }: +let + cfg = config.myServices.monitoring; +in +{ + notify-secondary = { + resources = { + USER200 = config.myEnv.monitoring.status_url; + USER201 = config.myEnv.monitoring.status_token; + }; + commands = { + notify-master = "$USER2$/send_nrdp.sh -u \"$USER200$\" -t \"$USER201$\" -H \"$HOSTADDRESS$\" -s \"$SERVICEDESC$\" -S \"$SERVICESTATEID$\" -o \"$SERVICEOUTPUT$ | $SERVICEPERFDATA$\""; + }; + chunk = '' + cp ${./plugins}/send_nrdp.sh $out + patchShebangs $out/send_nrdp.sh + wrapProgram $out/send_nrdp.sh --prefix PATH : ${lib.makeBinPath [ + pkgs.curl pkgs.jq + ]} + ''; + }; + notify-primary = { + resources = { + USER210 = config.myEnv.monitoring.apprise_urls; + }; + commands = { + # $OVE is to force naemon to run via shell instead of execve which fails here + notify-host-by-email = "ADMINEMAIL=\"$ADMINEMAIL$\" SERVICENOTIFICATIONID=\"$SERVICENOTIFICATIONID$\" HOSTSTATE=\"$HOSTSTATE$\" HOSTOUTPUT=\"$HOSTOUTPUT$\" $USER2$/notify_by_email host \"$NOTIFICATIONTYPE$\" \"$HOSTALIAS$\" \"$LONGDATETIME$\" \"$CONTACTEMAIL$\" $OVE"; + # $OVE is to force naemon to run via shell instead of execve which fails here + notify-service-by-email = "ADMINEMAIL=\"$ADMINEMAIL$\" SERVICENOTIFICATIONID=\"$SERVICENOTIFICATIONID$\" SERVICEDESC=\"$SERVICEDESC$\" SERVICESTATE=\"$SERVICESTATE$\" SERVICEOUTPUT=\"$SERVICEOUTPUT$\" $USER2$/notify_by_email service \"$NOTIFICATIONTYPE$\" \"$HOSTALIAS$\" \"$LONGDATETIME$\" \"$CONTACTEMAIL$\" $OVE"; + notify-host-by-apprise = "HOST=\"$HOSTALIAS$\" NOTIFICATIONTYPE=\"$NOTIFICATIONTYPE$\" HOSTSTATE=\"$HOSTSTATE$\" HOSTOUTPUT=\"$HOSTOUTPUT$\" $USER2$/notify_by_apprise host \"$ARG1$\""; + notify-service-by-apprise = "HOST=\"$HOSTALIAS$\" NOTIFICATIONTYPE=\"$NOTIFICATIONTYPE$\" SERVICESTATE=\"$SERVICESTATE$\" SERVICEDESC=\"$SERVICEDESC$\" SERVICEOUTPUT=\"$SERVICEOUTPUT$\" $USER2$/notify_by_apprise service \"$ARG1$\""; + }; + chunk = '' + cp ${./plugins}/{notify_by_email,notify_by_apprise} $out + patchShebangs $out/{notify_by_email,notify_by_apprise} + wrapProgram $out/notify_by_email --prefix PATH : ${lib.makeBinPath [ + pkgs.mailutils + ]} + wrapProgram $out/notify_by_apprise --prefix PATH : ${lib.makeBinPath [ + pkgs.apprise + ]} + ''; + }; + bandwidth = { + commands = { + check_local_bandwidth = "$USER2$/check_bandwidth -i=$ARG1$ -w $ARG2$ -c $ARG3$"; + }; + chunk = '' + cp ${./plugins}/check_bandwidth $out/ + patchShebangs $out/check_bandwidth + wrapProgram $out/check_bandwidth --prefix PATH : ${lib.makeBinPath [ + pkgs.iproute pkgs.bc + ]} + ''; + }; + command = { + commands = { + check_command_match = "$USER2$/check_command -c \"$ARG1$\" -C \"$ARG2$\" $ARG3$"; + check_command_output = "$USER2$/check_command -c \"$ARG1$\" -s 0 -o \"$ARG2$\" $ARG3$"; + check_command_status = "$USER2$/check_command -c \"$ARG1$\" -s \"$ARG2$\" $ARG3$"; + }; + chunk = '' + cp ${./plugins}/check_command $out/ + patchShebangs $out/check_command + wrapProgram $out/check_command --prefix PATH : ${config.security.wrapperDir} + ''; + }; + dns = { + commands = { + check_dns = "$USER1$/check_dns -H $ARG1$ -s $HOSTADDRESS$ $ARG2$"; + check_external_dns = "$USER1$/check_dns -H $ARG2$ -s $ARG1$ $ARG3$"; + }; + }; + mdadm = { + commands = { + check_mdadm = "$USER2$/check_command -c \"${pkgs.mdadm}/bin/mdadm --monitor --scan -1\" -s 0 -o \"^$\" -r root"; + }; + sudo = _: { + commands = [ + { command = "${pkgs.mdadm}/bin/mdadm --monitor --scan -1"; options = [ "NOPASSWD" ]; } + ]; + runAs = "root"; + }; + }; + postfix = { + commands = { + check_mailq = "$USER1$/check_mailq -s -w 1 -c 2"; + }; + sudo = _: { + commands = [ + { command = "${pkgs.postfix}/bin/mailq"; options = [ "NOPASSWD" ]; } + ]; + runAs = "root"; + }; + }; + emails = { + resources = { + USER203 = config.secrets.fullPaths."naemon/id_rsa"; + }; + commands = { + check_emails = "$USER2$/check_emails -H $HOSTADDRESS$ -i $USER203$ -l $ARG1$ -p $ARG2$ -s $ARG3$ -f $ARG4$"; + check_emails_local = "$USER2$/check_emails -H $HOSTADDRESS$ -n $ARG1$ -r $ADMINEMAIL$ -s $ARG2$ -f $ARG3$"; + }; + chunk = let + send_mails = pkgs.runCommand "send_mails" { + buildInputs = [ pkgs.makeWrapper ]; + } '' + mkdir -p $out/bin + cp ${./send_mails} $out/bin/send_mails + patchShebangs $out + wrapProgram $out/bin/send_mails --prefix PATH : ${lib.makeBinPath [ + pkgs.mailutils + ]} + ''; + in '' + cp ${./plugins}/check_emails $out/ + patchShebangs $out/check_emails + wrapProgram $out/check_emails --prefix PATH : ${lib.makeBinPath [ + pkgs.openssh send_mails + ]} --prefix PERL5LIB : ${pkgs.perlPackages.makePerlPath [ + pkgs.perlPackages.TimeDate + ]} + ''; + }; + eriomem = { + resources = { + USER208 = builtins.concatStringsSep "," (map (builtins.concatStringsSep ":") config.myEnv.monitoring.eriomem_keys); + }; + commands = { + check_backup_eriomem = "$USER2$/check_eriomem $USER208$"; + check_backup_eriomem_age = "$USER2$/check_backup_eriomem_age $ARG1$"; + }; + chunk = '' + cp ${./plugins}/check_eriomem $out/ + patchShebangs $out/check_eriomem + wrapProgram $out/check_eriomem --prefix PATH : ${lib.makeBinPath [ + pkgs.s3cmd pkgs.python38 + ]} + cp ${./plugins}/check_backup_age $out/check_backup_eriomem_age + patchShebangs $out/check_backup_eriomem_age + wrapProgram $out/check_backup_eriomem_age --prefix PATH : ${lib.makeBinPath [ + pkgs.duplicity + ]} --set SECRETS_PATH ${lib.optionalString cfg.master config.secrets.fullPaths."eriomem_access_key"} + ''; + }; + file_date = { + commands = { + check_last_file_date = "${sudo} -u \"$ARG3$\" $USER2$/check_last_file_date \"$ARG1$\" \"$ARG2$\""; + }; + chunk = '' + cp ${./plugins}/check_last_file_date $out/ + patchShebangs $out/check_last_file_date + ''; + sudo = myplugins: { + commands = [ + { command = "${myplugins}/check_last_file_date /backup2/*"; options = [ "NOPASSWD" ]; } + ]; + runAs = "ALL"; + }; + }; + ftp = { + commands = { + check_ftp_database = "$USER2$/check_ftp_database"; + }; + chunk = '' + cp ${./plugins}/check_ftp_database $out/ + patchShebangs $out/check_ftp_database + wrapProgram $out/check_ftp_database --prefix PATH : ${lib.makeBinPath [ + pkgs.lftp + ]} + ''; + }; + git = { + resources = { + USER203 = config.secrets.fullPaths."naemon/id_rsa"; + }; + commands = { + check_git = "$USER2$/check_git $USER203$"; + }; + chunk = '' + cp ${./plugins}/check_git $out/ + patchShebangs $out/check_git + wrapProgram $out/check_git --prefix PATH : ${lib.makeBinPath [ + pkgs.git pkgs.openssh + ]} + ''; + }; + http = { + resources = { + USER202 = config.myEnv.monitoring.http_user_password; + }; + commands = { + check_http = "$USER1$/check_http --sni -f stickyport -H \"$ARG1$\" -u \"$ARG2$\" -r \"$ARG3$\""; + check_https = "$USER1$/check_http --sni --ssl -f stickyport -H \"$ARG1$\" -u \"$ARG2$\" -r \"$ARG3$\""; + check_https_4 = "$USER1$/check_http -4 --sni --ssl -f stickyport -H \"$ARG1$\" -u \"$ARG2$\" -r \"$ARG3$\""; + check_https_6 = "$USER1$/check_http -6 --sni --ssl -f stickyport -H \"$ARG1$\" -u \"$ARG2$\" -r \"$ARG3$\""; + check_https_auth = "$USER1$/check_http --sni --ssl -a \"$USER202$\" -f stickyport -H \"$ARG1$\" -u \"$ARG2$\" -r \"$ARG3$\""; + check_https_certificate = "$USER1$/check_http --sni --ssl -H \"$ARG1$\" -C 21,15"; + check_https_code = "$USER1$/check_http --sni --ssl -f stickyport -H \"$ARG1$\" -u \"$ARG2$\" -e \"$ARG3$\" -r \"$ARG4$\""; + }; + }; + imap = { + resources = { + USER204 = config.myEnv.monitoring.imap_login; + USER205 = config.myEnv.monitoring.imap_password; + }; + commands = { + check_imap_connection = "$USER2$/check_imap_connection -u \"$USER204$\" -p \"$USER205$\" -H \"imap.immae.eu:143\""; + }; + chunk = '' + cp ${./plugins}/check_imap_connection $out/ + patchShebangs $out/check_imap_connection + wrapProgram $out/check_imap_connection --prefix PATH : ${lib.makeBinPath [ + pkgs.openssl + ]} + ''; + }; + megaraid = let + megacli = pkgs.megacli.overrideAttrs(old: { meta = old.meta // { license = null; }; }); + in { + commands = { + check_megaraid = "$USER2$/check_megaraid_sas --sudo"; + }; + chunk = let + megaCliPlugin = pkgs.runCommand "megaCliPlugin" { + plugin = pkgs.fetchurl { + name = "check_megaraid_sas"; + url = "https://exchange.nagios.org/components/com_mtree/attachment.php?link_id=6381&cf_id=24"; + sha256 = "0yf60p4c0hb4q3fng9fc14qc89bqm0f1sijayzygadaqcl44jx4p"; + }; + } '' + mkdir $out + cp $plugin $out/check_megaraid_sas + chmod +x $out/check_megaraid_sas + patchShebangs $out + substituteInPlace $out/check_megaraid_sas --replace /usr/sbin/MegaCli ${megacli}/bin/MegaCli64 + substituteInPlace $out/check_megaraid_sas --replace 'sudo $megacli' '${sudo} $megacli' + sed -i -e "s/use utils qw(%ERRORS);/my %ERRORS = ('OK' => 0, 'WARNING' => 1, 'CRITICAL' => 2, 'UNKNOWN' => 3);/" $out/check_megaraid_sas + ''; + in '' + cp ${megaCliPlugin}/check_megaraid_sas $out/ + patchShebangs $out/check_megaraid_sas + ''; + sudo = _: { + commands = [ + { command = "${megacli}/bin/MegaCli64"; options = [ "NOPASSWD" ]; } + ]; + runAs = "root"; + }; + }; + memory = { + commands = { + check_memory = "$USER2$/check_mem.sh -w $ARG1$ -c $ARG2$"; + }; + chunk = '' + cp ${./plugins}/check_mem.sh $out/ + patchShebangs $out/check_mem.sh + wrapProgram $out/check_mem.sh --prefix PATH : ${lib.makeBinPath [ + pkgs.gnugrep pkgs.gawk pkgs.procps + ]} + ''; + }; + mysql = { + commands = { + check_mysql_replication = "${sudo} -u mysql $USER2$/check_mysql_replication \"$ARG1$\" \"$ARG2$\""; + }; + chunk = '' + cp ${./plugins}/check_mysql_replication $out/ + patchShebangs $out/check_mysql_replication + wrapProgram $out/check_mysql_replication --prefix PATH : ${lib.makeBinPath [ + pkgs.gnugrep pkgs.gnused pkgs.coreutils pkgs.mariadb + ]} + ''; + sudo = myplugins: { + commands = [ + { command = "${myplugins}/check_mysql_replication *"; options = [ "NOPASSWD" ]; } + ]; + runAs = "mysql"; + }; + }; + openldap = { + commands = { + check_openldap_replication = "${sudo} -u openldap $USER2$/check_openldap_replication \"$ARG1$\" \"$ARG2$\" \"$ARG3$\" \"$ARG4$\" \"$ARG5$\""; + }; + chunk = '' + cp ${./plugins}/check_openldap_replication $out/ + patchShebangs $out/check_openldap_replication + wrapProgram $out/check_openldap_replication --prefix PATH : ${lib.makeBinPath [ + pkgs.gnugrep pkgs.gnused pkgs.coreutils pkgs.openldap + ]} + ''; + sudo = myplugins: { + commands = [ + { command = "${myplugins}/check_openldap_replication *"; options = [ "NOPASSWD" ]; } + ]; + runAs = "openldap"; + }; + }; + ovh = { + resources = { + USER209 = builtins.concatStringsSep "," [ + config.myEnv.monitoring.ovh_sms.endpoint + config.myEnv.monitoring.ovh_sms.application_key + config.myEnv.monitoring.ovh_sms.application_secret + config.myEnv.monitoring.ovh_sms.consumer_key + config.myEnv.monitoring.ovh_sms.account + ]; + }; + commands = { + check_backup_ovh_age = "$USER2$/check_backup_ovh_age $ARG1$"; + check_ovh_sms = "$USER2$/check_ovh_sms \"$USER209$\""; + }; + chunk = '' + cp ${./plugins}/check_backup_age $out/check_backup_ovh_age + patchShebangs $out/check_backup_ovh_age + wrapProgram $out/check_backup_ovh_age --prefix PATH : ${lib.makeBinPath [ + pkgs.duplicity + ]} --set SECRETS_PATH ${lib.optionalString cfg.master config.secrets.fullPaths."ovh_access_key"} + cp ${./plugins}/check_ovh_sms $out/ + patchShebangs $out/check_ovh_sms + wrapProgram $out/check_ovh_sms --prefix PATH : ${lib.makeBinPath [ + (pkgs.python38.withPackages (ps: [ps.ovh])) + ]} + ''; + }; + postgresql = { package }: { + commands = { + check_postgresql_replication = "${sudo} -u postgres $USER2$/check_postgres_replication \"$ARG1$\" \"$ARG2$\" \"$ARG3$\""; + check_postgresql_database_count = "$USER2$/check_postgres_database_count \"$ARG1$\" \"$ARG2$\" \"$ARG3$\""; + }; + chunk = '' + cp ${./plugins}/check_postgres_replication $out/ + patchShebangs $out/check_postgres_replication + wrapProgram $out/check_postgres_replication --prefix PATH : ${lib.makeBinPath [ + package + ]} + cp ${./plugins}/check_postgres_database_count $out/ + patchShebangs $out/check_postgres_database_count + wrapProgram $out/check_postgres_database_count --prefix PATH : ${lib.makeBinPath [ + package + ]} + ''; + + sudo = myplugins: { + commands = [ + { command = "${myplugins}/check_postgres_replication *"; options = [ "NOPASSWD" ]; } + ]; + runAs = "postgres"; + }; + }; + redis = { + commands = { + check_redis_replication = "${sudo} -u redis $USER2$/check_redis_replication \"$ARG1$\""; + }; + chunk = '' + cp ${./plugins}/check_redis_replication $out/ + patchShebangs $out/check_redis_replication + wrapProgram $out/check_redis_replication --prefix PATH : ${lib.makeBinPath [ + pkgs.gnugrep pkgs.coreutils pkgs.redis + ]} + ''; + sudo = myplugins: { + commands = [ + { command = "${myplugins}/check_redis_replication *"; options = [ "NOPASSWD" ]; } + ]; + runAs = "redis"; + }; + }; + tcp = { + commands = { + check_tcp = "$USER1$/check_tcp -H $HOSTADDRESS$ -p $ARG1$ -e \"$ARG2$\" -Mcrit"; + check_tcp_ssl = "$USER1$/check_tcp -H $HOSTADDRESS$ -p $ARG1$ -S -D 21,15"; + }; + }; + zfs = { + commands = { + check_zfs = "$USER2$/check_zpool.sh -p ALL -w 80 -c 90"; + check_zfs_snapshot = "$USER2$/check_zfs_snapshot -d $ARG1$ -c 18000 -w 14400"; + }; + chunk = let + zfsPlugin = pkgs.fetchurl { + url = "https://www.claudiokuenzler.com/monitoring-plugins/check_zpools.sh"; + sha256 = "0p9ms9340in80jkds4kfspw62xnzsv5s7ni9m28kxyd0bnzkbzhf"; + }; + in '' + cp ${zfsPlugin} $out/check_zpool.sh + chmod +x $out/check_zpool.sh + patchShebangs $out/check_zpool.sh + wrapProgram $out/check_zpool.sh --prefix PATH : ${lib.makeBinPath [ + pkgs.which pkgs.zfs pkgs.gawk + ]} + cp ${./plugins}/check_zfs_snapshot $out + patchShebangs $out/check_zfs_snapshot + wrapProgram $out/check_zfs_snapshot --prefix PATH : ${lib.makeBinPath [ + pkgs.zfs pkgs.coreutils pkgs.gawk pkgs.gnugrep + ]} + ''; + }; +} diff --git a/flakes/private/monitoring/nagios-cli.cfg b/flakes/private/monitoring/nagios-cli.cfg new file mode 100644 index 0000000..7bd30cb --- /dev/null +++ b/flakes/private/monitoring/nagios-cli.cfg @@ -0,0 +1,68 @@ +# -*- coding: utf-8; -*- + +[cli] +history = /var/lib/naemon/nagios_cli_history + +[ui] +color = 1 +prompt = naemon %s> +prompt_separator = " → " + +[nagios] +log = /var/log/naemon +command_file = /run/naemon/naemon.cmd +log_file = %(log)s/naemon.log +object_cache_file = /var/lib/naemon/objects.cache +status_file = /var/lib/naemon/status.dat + +[object] +host.status = + host_name + current_state + plugin_output + is_flapping + last_check + last_time_down + last_state_change + check_period + notification_period + current_attempt + max_attempts +service.status = + host_name + service_description + current_state + is_flapping + plugin_output + last_time_down + last_state_change + last_check + next_check + check_interval + check_latency + check_period + notification_period + current_attempt + max_attempts + +[string] +level.ok = ↑ OK +level.warning = ! WARNING +level.critical = ↓ CRITICAL +level.unknown = ↕ UNKNOWN + +[color] +error = bold_red + +prompt = normal +prompt.object = bold + +host.host_name = bold +host.plugin_output = bold +service.plugin_output = bold + +level.ok = bold_green +level.warning = bold_yellow +level.critical = bold_red +level.unknown = bold_magenta + diff --git a/flakes/private/monitoring/objects_common.nix b/flakes/private/monitoring/objects_common.nix new file mode 100644 index 0000000..1da7764 --- /dev/null +++ b/flakes/private/monitoring/objects_common.nix @@ -0,0 +1,227 @@ +{ hostFQDN +, hostName +, interface ? "eth0" +, processWarn ? "250" +, processAlert ? "400" +, loadWarn ? "0.9" +, load5Warn ? loadWarn +, load15Warn ? load5Warn +, loadAlert ? "1.0" +, load5Alert ? loadAlert +, load15Alert ? load5Alert +, master +, ... +}: +{ + host = { + "${hostFQDN}" = { + alias = hostFQDN; + address = hostFQDN; + use = "linux-server"; + hostgroups = "webstatus-hosts"; + _webstatus_name = hostName; + _webstatus_vhost = "status.immae.eu"; + }; + }; + service = [ + { + service_description = "Size on root partition"; + use = "local-service"; + check_command = ["check_local_disk" "20%" "10%" "/"]; + __passive_servicegroups = "webstatus-resources"; + } + { + service_description = "Total number of process"; + use = "local-service"; + check_command = [ + "check_local_procs" + processWarn + processAlert + "RSZDT" + ]; + __passive_servicegroups = "webstatus-resources"; + } + { + service_description = "Network bandwidth"; + use = "local-service"; + check_interval = 2; + max_check_attempts = "20"; + retry_interval = "2"; + check_command = [ + "check_local_bandwidth" + interface + "20480" # kb/s + "51200" # kb/s + ]; + __passive_servicegroups = "webstatus-resources"; + } + { + service_description = "Average load"; + use = "local-service"; + check_command = [ + "check_local_load" + "${loadWarn},${load5Warn},${load15Warn}" + "${loadAlert},${load5Alert},${load15Alert}" + ]; + __passive_servicegroups = "webstatus-resources"; + } + { + service_description = "Swap usage"; + use = "local-service"; + check_command = ["check_local_swap" "20" "10"]; + __passive_servicegroups = "webstatus-resources"; + } + { + service_description = "Memory usage"; + use = "local-service"; + check_command = ["check_memory" "80" "90"]; + __passive_servicegroups = "webstatus-resources"; + } + { + service_description = "NTP is activated and working"; + use = "local-service"; + check_command = ["check_ntp"]; + __passive_servicegroups = "webstatus-resources"; + } + ]; + command = { + check_local_disk = "$USER1$/check_disk -w $ARG1$ -c $ARG2$ -p $ARG3$"; + check_local_procs = "$USER1$/check_procs -w $ARG1$ -c $ARG2$ -s $ARG3$"; + check_local_load = "$USER1$/check_load -r -w $ARG1$ -c $ARG2$"; + check_local_swap = "$USER1$/check_swap -n ok -w $ARG1$ -c $ARG2$"; + check_ntp = "$USER1$/check_ntp_time -t 30 -q -H 0.arch.pool.ntp.org"; + check_smtp = "$USER1$/check_smtp -H $HOSTADDRESS$ -p 25 -S -D 21,15"; + + check_host_alive = "$USER1$/check_ping -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 5"; + check_ok = "$USER1$/check_dummy 0 \"Dummy OK\""; + check_critical = "$USER1$/check_dummy 2 \"Dummy CRITICAL\""; + }; + timeperiod = { + "24x7" = { + alias = "24 Hours A Day, 7 Days A Week"; + monday = "00:00-24:00"; + tuesday = "00:00-24:00"; + wednesday = "00:00-24:00"; + thursday = "00:00-24:00"; + friday = "00:00-24:00"; + saturday = "00:00-24:00"; + sunday = "00:00-24:00"; + }; + }; + servicegroup = { + webstatus-webapps = { alias = "Web applications"; }; + webstatus-websites = { alias = "Personal websites"; }; + webstatus-ssl = { alias = "SSL certificates"; }; + webstatus-dns = { alias = "DNS resolution"; }; + webstatus-remote-services = { alias = "Other remote services"; }; + webstatus-local-services = { alias = "Other local services"; }; + webstatus-email = { alias = "E-mail services"; }; + webstatus-resources = { alias = "Local resources"; }; + webstatus-databases = { alias = "Databases resources"; }; + webstatus-backup = { alias = "Backup resources"; }; + }; + hostgroup = { + webstatus-hosts = { alias = "Hosts"; }; + }; + contactgroup = { + admins = { alias = "Naemon Administrators"; }; + }; + templates = { + service = { + generic-service = { + active_checks_enabled = "1"; + check_freshness = "0"; + check_interval = 10; + check_period = "24x7"; + contact_groups = "admins"; + event_handler_enabled = "1"; + flap_detection_enabled = "1"; + is_volatile = "0"; + max_check_attempts = "3"; + notification_interval = "60"; + notification_options = "w,u,c,r,f,s"; + notification_period = "24x7"; + notifications_enabled = if master then "1" else "0"; + obsess_over_service = "1"; + passive_checks_enabled = "1"; + process_perf_data = "1"; + retain_nonstatus_information = "1"; + retain_status_information = "1"; + retry_interval = "2"; + _webstatus_namespace = "immae"; + }; + local-service = { + use = "generic-service"; + host_name = hostFQDN; + check_interval = 5; + max_check_attempts = "4"; + retry_interval = "1"; + servicegroups = "webstatus-resources"; + }; + external-service = { + use = "generic-service"; + check_interval = 5; + max_check_attempts = "4"; + retry_interval = "1"; + }; + web-service = { + use = "generic-service"; + check_interval = 20; + max_check_attempts = "2"; + retry_interval = "1"; + }; + external-web-service = { + use = "generic-service"; + check_interval = 20; + max_check_attempts = "2"; + retry_interval = "1"; + }; + mail-service = { + use = "generic-service"; + check_interval = 15; + max_check_attempts = "1"; + retry_interval = "1"; + }; + dns-service = { + use = "generic-service"; + check_interval = 120; + notification_interval = "120"; + max_check_attempts = "5"; + retry_interval = "5"; + }; + }; + # No contact, we go through master + contact = { + generic-contact = { + host_notification_commands = "notify-host-by-email"; + host_notification_options = "d,u,r,f,s"; + host_notification_period = "24x7"; + service_notification_commands = "notify-service-by-email"; + service_notification_options = "w,u,c,r,f,s"; + service_notification_period = "24x7"; + }; + }; + host = { + generic-host = { + event_handler_enabled = "1"; + flap_detection_enabled = "1"; + notification_period = "24x7"; + notifications_enabled = "1"; + process_perf_data = "1"; + retain_nonstatus_information = "1"; + retain_status_information = "1"; + }; + linux-server = { + check_command = "check_host_alive"; + check_interval = "5"; + check_period = "24x7"; + contact_groups = "admins"; + max_check_attempts = "10"; + notification_interval = "120"; + notification_options = "d,u,r,f"; + retry_interval = "1"; + _webstatus_namespace = "immae"; + }; + }; + }; +} diff --git a/flakes/private/monitoring/plugins/check_backup_age b/flakes/private/monitoring/plugins/check_backup_age new file mode 100755 index 0000000..d873bdc --- /dev/null +++ b/flakes/private/monitoring/plugins/check_backup_age @@ -0,0 +1,66 @@ +#!/usr/bin/env bash + +set -euo pipefail + +source $SECRETS_PATH +export HOME=$(mktemp -d) + +trap "rm -rf $HOME" EXIT +folder=$1 + +parse_date() { + d=$1 + echo $d | sed -e "s/^\(....\)\(..\)\(..\)T\(..\)\(..\)\(..\)/\1-\2-\3T\4:\5:\6/" +} + +output=$(duplicity collection-status --log-fd 2 "$BASE_URL$folder" 2>&1 > /dev/null) + +output=$(echo "$output" | grep -v "^\.") + +last_full=$(parse_date "$(echo "$output" | grep "^ full " | cut -d' ' -f3 | sort | tail -n1)") +last_bkp=$(parse_date "$(echo "$output" | grep -E "^ (full|inc) " | cut -d' ' -f3 | sort | tail -n1)") +orphaned_sets=$(echo "$output" | grep "^orphaned-sets-num" | cut -d' ' -f2) +incomplete_sets=$(echo "$output" | grep "^incomplete-sets-num" | cut -d' ' -f2) + +if [[ -z "$last_full" || -z "$last_bkp" || -z "$orphaned_sets" || -z "$incomplete_sets" ]]; then + echo "duply-backup $folder UNKNOWN - impossible to parse result" + exit 3 +fi + +last_full_age=$(( ($(date "+%s") - $(date -d "$last_full" "+%s")) / (60*60*24) )) +last_bkp_age=$(( ($(date "+%s") - $(date -d "$last_bkp" "+%s")) / (60*60) )) + +PERFS="orphan=$orphaned_sets;1;;0; incomplete=$incomplete_sets;1;;0; age=${last_bkp_age}h;30;48;0; full_age=${last_full_age}d;35;45;0;" + + +WARNINGS="" +ERRORS="" +if [[ "$incomplete_sets" -gt 0 ]]; then + WARNINGS="$WARNINGS - Incomplete sets is $incomplete_sets" +fi + +if [[ "$orphaned_sets" -gt 0 ]]; then + WARNINGS="$WARNINGS - Orphaned sets is $orphaned_sets" +fi + +if [[ "$last_full_age" -gt 45 ]]; then + ERRORS="$ERRORS - Last full backup is too old $last_full" +elif [[ "$last_full_age" -gt 35 ]]; then + WARNINGS="$WARNINGS - Last full backup is getting old $last_full" +fi + +if [[ "$last_bkp_age" -gt 48 ]]; then + ERRORS="$ERRORS - Last backup is too old $last_bkp" +elif [[ "$last_bkp_age" -gt 30 ]]; then + WARNINGS="$WARNINGS - Last backup is getting old $last_bkp" +fi + +if [[ -n "$ERRORS" ]]; then + echo "duply-backup $folder CRITICAL$ERRORS$WARNINGS | $PERFS" + exit 2 +elif [[ -n "$WARNINGS" ]]; then + echo "duply-backup $folder WARNING$WARNINGS | $PERFS" + exit 1 +else + echo "duply-backup $folder OK | $PERFS" +fi diff --git a/flakes/private/monitoring/plugins/check_bandwidth b/flakes/private/monitoring/plugins/check_bandwidth new file mode 100755 index 0000000..21d01f5 --- /dev/null +++ b/flakes/private/monitoring/plugins/check_bandwidth @@ -0,0 +1,122 @@ +#!/bin/bash + +# ============================== SUMMARY ===================================== +#Author : Ken Roulamellah +#Date : 19/07/2018 +#Version : 1.0 +# Licence : GPL +# ===================== INFORMATION ABOUT THIS PLUGIN ======================== +# +# This plugin checks the average RX and TX bandwidth utilisation. It use +# kbytes as measure unite. +# +# ========================== START OF PROGRAM CODE =========================== + +STATE_OK=0 +STATE_WARNING=1 +STATE_CRITICAL=2 +STATE_UNKNOWN=3 + +interface=$( ip route | grep default | awk '{print $5}' | head -n1) +function print_usage() +{ + echo "Usage :" + echo "$0 [ -i=INTERFACE] [ -ct=COUNT ] -w WARNING -c CRITICAL" + echo "This script calculate the average bandwith usage." + echo "Default values | interface: ${interface}, counter: 10" +} + +counter=10 +warning=-1 +critical=-1 + +sum_rx=0 +sum_tx=0 +avg_rx= +avg_tx= +i= + + +if [[ $# -lt 4 ]]; +then + echo "Error: Arguments are missing" + print_usage + exit $STATE_UNKNOWN +fi + +while [[ $# -gt 0 ]]; do + case "$1" in + -i=*) + interface="$(cut -d'=' -f2 <<<"$1")" + shift + ;; + -ct=*) + counter="$(cut -d'=' -f2 <<<"$1")" + shift + ;; + -w) + warning=$2 + shift 2 + ;; + -c) + critical=$2 + shift 2 + ;; + *) + printf "\nError: Invalid option '$1'" + print_usage + exit $STATE_UNKNOWN + ;; + esac +done + +if [ $warning -lt 0 ] || [ $critical -lt 0 ]; +then + echo "Error: You need to specify a warning and critical treshold" + print_usage + exit $STATE_UNKNOWN +fi + +grep -q "up" /sys/class/net/$interface/operstate || exec echo "$interface: no such device or down" + +read rx <"/sys/class/net/$interface/statistics/rx_bytes" +read tx <"/sys/class/net/$interface/statistics/tx_bytes" + +i=$counter +while [ $i -gt 0 ]; do + sleep 1 + read newrx <"/sys/class/net/$interface/statistics/rx_bytes" + read newtx <"/sys/class/net/$interface/statistics/tx_bytes" + + #echo "old rx :$rx" + #echo "new rx :$newrx" + rx_cal=$(bc <<< "scale=2; ($newrx-$rx) / 1000") + tx_cal=$(bc <<< "scale=2; ($newtx-$tx) / 1000") + + sum_rx=$(bc <<< "scale=2;$sum_rx+$rx_cal") + sum_tx=$(bc <<< "scale=2;$sum_tx+$tx_cal") + + #echo "$interface {rx: $rx_cal ko/s, tx: $tx_cal ko/s}" + rx=$newrx + tx=$newtx + ((i --)) +done + +avg_rx=$(bc <<< "scale=2;$sum_rx/$counter") +avg_tx=$(bc <<< "scale=2;$sum_tx/$counter") + +#echo "$avg_rx" +#echo "$avg_tx" + + +if [ $(bc <<< "$avg_rx > $critical || $avg_tx > $critical") -eq 1 ]; then + echo "$interface CRITICAL - AVG_RX: $avg_rx kb/s, AVG_TX: $avg_tx kb/s | RX="$avg_rx"kbps;0;0;0; TX="$avg_tx"kbps;0;0;0;" + exit $STATE_CRITICAL +elif [ $(bc <<< "$avg_rx > $warning || $avg_tx > $warning") -eq 1 ]; then + echo "$interface WARNING - AVG_RX: $avg_rx kb/s, AVG_TX: $avg_tx kb/s | RX="$avg_rx"kbps;0;0;0; TX="$avg_tx"kbps;0;0;0;" + exit $STATE_WARNING +else + echo "$interface - OK AVG_RX: $avg_rx kb/s, AVG_TX: $avg_tx kb/s | RX="$avg_rx"kbps;0;0;0; TX="$avg_tx"kbps;0;0;0;" + exit $STATE_OK +fi +exit 3 diff --git a/flakes/private/monitoring/plugins/check_command b/flakes/private/monitoring/plugins/check_command new file mode 100755 index 0000000..2b546c1 --- /dev/null +++ b/flakes/private/monitoring/plugins/check_command @@ -0,0 +1,113 @@ +#!/usr/bin/env perl + +use strict; +use Getopt::Std; +$| = 1; + +my %opts; +getopts('hr:C:c:s:o:', \%opts); + +my $STATE_OK = 0; +my $STATE_WARNING = 1; +my $STATE_CRITICAL = 2; +my $STATE_UNKNOWN = 3; + +if ($opts{'h'} || scalar(%opts) == 0) { + &print_help(); + exit($STATE_OK); +} + +my $command = $opts{'c'}; +if ($command eq '') { + print "You must provide a command to check.\n"; + exit($STATE_UNKNOWN); +} + +my $expected_output = $opts{'o'}; +my $expected_status = $opts{'s'}; +my $other_command = $opts{'C'}; + +if ($other_command eq '' and $expected_status eq '' and $expected_output eq '') { + $expected_status = 0; +} + +my $cmd = $command . ' 2>&1'; +my $other_cmd; +if ($other_command ne '') { + $other_cmd = $other_command . ' 2>&1'; +} + +my $run_as; +if ($opts{'r'}) { + $run_as = $opts{'r'}; + $cmd = "sudo -u $run_as -n $cmd"; + + if ($other_command ne '') { + $other_cmd = "sudo -u $run_as -n $other_cmd"; + } + +} + +my $cmd_result = `$cmd`; +my $other_cmd_result; +if ($other_command ne '') { + $other_cmd_result = `$other_cmd`; + chomp($other_cmd_result); +} + +chomp($cmd_result); +if ($cmd_result =~ /sudo/i) { + print "$command CRITICAL - No sudo right to run the command | result=1;;;;\n"; + exit($STATE_UNKNOWN); +} elsif ($expected_status ne '') { + if ($? != $expected_status) { + print "$command CRITICAL - Response status $? | result=1;;;;\n"; + exit($STATE_CRITICAL); + } else { + print "$command OK - Response status $? | result=0;;;;\n"; + exit($STATE_OK); + } +} elsif ($other_command ne '') { + if ($cmd_result ne $other_cmd_result) { + print "$command CRITICAL - Expected output not matching other command output | result=1;;;;\n"; + exit($STATE_CRITICAL); + } else { + print "$command OK - Expected output matching other command output | result=0;;;;\n"; + exit($STATE_OK); + } +} else { + if ($cmd_result !~ /$expected_output/) { + print "$command CRITICAL - Expected output not matching | result=1;;;;\n"; + exit($STATE_CRITICAL); + } else { + print "$command OK - Expected output matching | result=0;;;;\n"; + exit($STATE_OK); + } +} + +sub print_help() { + print << "EOF"; +Check whether the given command responds as expected. One of -o -C or -s must be selected. + +Options: +-h + Print detailed help screen + +-c + command to run (required) + +-C + other command to compare output + +-r user + Run as user via sudo. + +-s + status code to check + +-o + output to check + +EOF +} + diff --git a/flakes/private/monitoring/plugins/check_emails b/flakes/private/monitoring/plugins/check_emails new file mode 100755 index 0000000..534e5a5 --- /dev/null +++ b/flakes/private/monitoring/plugins/check_emails @@ -0,0 +1,121 @@ +#!/usr/bin/env perl + +use strict; +use Getopt::Std; +use File::Basename; +use Date::Parse; +use POSIX qw(strftime); + +$| = 1; + +my %opts; +getopts('hH:l:s:p:f:i:n:r:', \%opts); + +my $STATE_OK = 0; +my $STATE_WARNING = 1; +my $STATE_CRITICAL = 2; +my $STATE_UNKNOWN = 3; + +if ($opts{'h'} || scalar(%opts) == 0) { + &print_help(); + exit($STATE_OK); +} + +my $port = $opts{'p'}; +my $host = $opts{'H'}; +my $login = $opts{'l'}; +if ($login ne '') { + $login = "$login@"; +} + +my $identity = $opts{'i'}; +my $local_directory = $opts{'n'}; +my $return_path = $opts{'r'}; + +my @emails_to_send = split(/,/, $opts{'s'}); +my @emails_to_expect = split(/,/, $opts{'f'}); + +my $cmd_result; +if ($local_directory ne '') { + if (@emails_to_expect and ! -d $local_directory) { + print "Emails $host UNKNOWN - Could not find local directory"; + exit($STATE_UNKNOWN); + } + $cmd_result = `send_mails $local_directory $return_path @emails_to_send 2>&1`; +} else { + $cmd_result = `ssh -o BatchMode=yes -o UserKnownHostsFile=/dev/null -o CheckHostIP=no -o StrictHostKeyChecking=no -p $port -i $identity $login$host send_mails @emails_to_send 2>&1`; + + if ($cmd_result =~ /Host key verification failed./) { + print "Emails $host UNKNOWN - Could not connect to host with ssh key\n"; + exit($STATE_UNKNOWN); + } +} + +my @lines = split(/\n/, $cmd_result); + +my %found_emails; + +foreach my $line (@lines) { + my @split_line = split(/;/, $line, 2); + $found_emails{$split_line[0]} = $split_line[1]; +} + +my $output = ""; +my $old = 0; +foreach my $email_from (@emails_to_expect) { + my @email_split = split(/:/, $email_from); + my $email = $email_split[0]; + my $from = $email_split[1]; + + if ( exists $found_emails{$email} ) { + my $email_date = str2time($found_emails{$email}); + my $current_date = strftime "%s", localtime; + + if ($current_date - $email_date > 60*30) { + $output = "$output$email ($found_emails{$email} from $from) "; + } + $old = ($current_date - $email_date) > $old ? ($current_date - $email_date) : $old; + } else { + $output = "$output$email (missing) " + } +} + +if ($output ne '') { + print "Emails $host CRITICAL - expecting emails: $output | timestamp=${old}s;;;;\n"; + exit($STATE_CRITICAL); +} else { + print "Emails $host OK | timestamp=${old}s;;;;\n"; + exit($STATE_OK); +} + +sub print_help() { + print << "EOF"; +Check sent emails + +Options: +-h + Print detailed help screen + +-H + Host to check + +-l + Login + +-i + Identity file + +-n + Don’t use ssh, pass that directory to script + +-r + Return path for local e-mails + +-s + Comma separated list of emails to send from the host. + +-f + Comma separated list of emails to expect on the host. +EOF +} + diff --git a/flakes/private/monitoring/plugins/check_eriomem b/flakes/private/monitoring/plugins/check_eriomem new file mode 100755 index 0000000..880b88a --- /dev/null +++ b/flakes/private/monitoring/plugins/check_eriomem @@ -0,0 +1,83 @@ +#!/usr/bin/env python +import os +import sys +import getopt +import signal +from subprocess import Popen, PIPE + +STATE_OK = 0 +STATE_WARNING = 1 +STATE_CRITICAL = 2 +STATE_UNKNOWN = 3 + +keys = sys.argv[1].split(",") + +def to_args(k): + access, secret = k.split(":", 1) + return [ + "s3cmd", + '-c=/dev/null', + '--no-check-certificate', + '--access_key={}'.format(access), + '--secret_key={}'.format(secret), + '--host=e.eriomem.net', + '--host-bucket=%(bucket)s.e.eriomem.net', + 'du' + ] + +max_size = 1024*1024*1024*1024 +warning_percent = 99.75 +critical_percent = 99.95 + +def output(code, msg): + print(msg) + sys.exit(code) + +def main(): + def handler(signum, frame): + raise IOError + signal.signal(signal.SIGALRM, handler) + signal.alarm(60) + + try: + ps = [Popen(to_args(a), stdout=PIPE, stderr=PIPE) for a in keys] + outs = [p.communicate() for p in ps] + rets = [p.wait() for p in ps] + except IOError: + for p in ps: + os.kill(p.pid, signal.SIGTERM) + output(STATE_UNKNOWN, + "Eriomem UNKNOWN - Command timeout after 60 seconds!") + + signal.alarm(0) + + if sum(rets) == 0: + usages = [int(out[0].decode().split("\n")[-2].split()[0]) for out in outs] + usage = sum(usages) + use_percent = 100 * usage / max_size + if use_percent > critical_percent: + output(STATE_CRITICAL, + "Eriomem CRITICAL - bucket usage: %s (%s%%);| size=%s;;;;" % + (sizeof_fmt(usage), use_percent, sizeof_fmt(usage))) + elif use_percent > warning_percent: + output(STATE_WARNING, + "Eriomem WARNING - bucket usage: %s (%s%%);| size=%s;;;;" % + (sizeof_fmt(usage), use_percent, sizeof_fmt(usage))) + else: + output(STATE_OK, + "Eriomem OK - bucket usage: %s (%d%%);| size=%s;;;;" % + (sizeof_fmt(usage), use_percent, sizeof_fmt(usage))) + else: + messages = "\n".join([out[0].decode() + out[1].decode() for out in outs]) + output(STATE_UNKNOWN, + "Eriomem UNKNOWN - Error in command") + +def sizeof_fmt(num): + for unit in ['','ko','Mo','Go','To','Po','Eo','Zo']: + if abs(num) < 1024.0: + return "%3.1f%s" % (num, unit) + num /= 1024.0 + return "%.1f%s%s" % (num, 'Yo') + +if __name__ == '__main__': + main() diff --git a/flakes/private/monitoring/plugins/check_ftp_database b/flakes/private/monitoring/plugins/check_ftp_database new file mode 100755 index 0000000..f9cf579 --- /dev/null +++ b/flakes/private/monitoring/plugins/check_ftp_database @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +OUT=$(echo "ls" | lftp -u test_ftp,test_ftp eldiron.immae.eu | grep it_works | wc -l) + +if [ "$OUT" -eq 1 ]; then + echo "ftp connection OK - access to ftp is working | ftp=1;;;;" + exit 0 +else + echo "ftp connection CRITICAL - no access to ftp | ftp=0;;;;" + exit 2 +fi diff --git a/flakes/private/monitoring/plugins/check_git b/flakes/private/monitoring/plugins/check_git new file mode 100755 index 0000000..e8fbb29 --- /dev/null +++ b/flakes/private/monitoring/plugins/check_git @@ -0,0 +1,81 @@ +#!/usr/bin/env bash + +SSH_KEY="$1" + +TMPDIR=$(mktemp -d) + +if [ ! -d "$TMPDIR" ]; then + echo "gitolite UNKNOWN - impossible to create temp dir" + exit 3 +fi + +trap "rm -rf $TMPDIR" EXIT + +ERRORS="" +OUTPUT="" +PERFS="" + +cd "$TMPDIR" +OUT=$(git clone -q git://git.immae.eu/perso/Immae/Projets/Ruby/Monitor.git 2>&1) +ERR=$? +if [ -n "$OUT" ]; then +OUTPUT="$OUTPUT +$OUT" +fi +if [ "$ERR" != 0 ]; then + PERFS="$PERFS git=0;;;;" + ERRORS="$ERRORS git://" +else + PERFS="$PERFS git=1;;;;" +fi +rm -rf Monitor + +OUT=$(git clone -q http://git.immae.eu/perso/Immae/Projets/Ruby/Monitor.git 2>&1) +ERR=$? +if [ -n "$OUT" ]; then +OUTPUT="$OUTPUT +$OUT" +fi +if [ "$ERR" != 0 ]; then + ERRORS="$ERRORS http://" + PERFS="$PERFS http=0;;;;" +else + PERFS="$PERFS http=1;;;;" +fi +rm -rf Monitor + +OUT=$(git clone -q https://git.immae.eu/perso/Immae/Projets/Ruby/Monitor.git 2>&1) +ERR=$? +if [ -n "$OUT" ]; then +OUTPUT="$OUTPUT +$OUT" +fi +if [ "$ERR" != 0 ]; then + ERRORS="$ERRORS https://" + PERFS="$PERFS https=0;;;;" +else + PERFS="$PERFS https=1;;;;" +fi +rm -rf Monitor + +OUT=$(GIT_SSH_COMMAND="ssh -i $SSH_KEY -o BatchMode=yes -o UserKnownHostsFile=/dev/null -o CheckHostIP=no -o StrictHostKeyChecking=no" git clone -q gitolite@git.immae.eu:perso/Immae/Projets/Ruby/Monitor 2>&1) +ERR=$? +if [ -n "$OUT" ]; then +OUTPUT="$OUTPUT +$OUT" +fi +if [ "$ERR" != 0 ]; then + ERRORS="$ERRORS ssh" + PERFS="$PERFS ssh=0;;;;" +else + PERFS="$PERFS ssh=1;;;;" +fi +rm -rf Monitor + +if [ -n "$ERRORS" ]; then + echo "gitolite CRITICAL - impossible to clone via$ERRORS | $PERFS" + exit 2 +else + echo "gitolite OK - ssh, git, http and https work | $PERFS" + exit 0 +fi diff --git a/flakes/private/monitoring/plugins/check_imap_connection b/flakes/private/monitoring/plugins/check_imap_connection new file mode 100755 index 0000000..c1ab0dd --- /dev/null +++ b/flakes/private/monitoring/plugins/check_imap_connection @@ -0,0 +1,52 @@ +#!/usr/bin/env perl + +use strict; +use Getopt::Std; +$| = 1; + +my %opts; +getopts('h:u:p:H:', \%opts); + +my $STATE_OK = 0; +my $STATE_WARNING = 1; +my $STATE_CRITICAL = 2; +my $STATE_UNKNOWN = 3; + +if ($opts{'h'} || !$opts{'u'} || !$opts{'p'} || !$opts{'H'}) { + &print_help(); + exit($STATE_UNKNOWN); +} + +my $user = $opts{'u'}; +my $password = $opts{'p'}; +my $host = $opts{'H'}; + +my $cmd_result = `(echo "a login $user $password"; echo "b logout") | openssl s_client -quiet -ign_eof -connect $host -starttls imap 2>&1`; +my $expected_result = "a OK Logged in"; + +chomp($cmd_result); +if ($cmd_result !~ /$expected_result/) { + print "IMAP CRITICAL - Unable to connect via imaps | imap=0;;;;\n"; + exit($STATE_CRITICAL); +} else { + print "IMAP OK - imaps connected successfully | imap=1;;;;\n"; + exit($STATE_OK); +} + +sub print_help() { + print << "EOF"; +Check whether imap works via ssl and is able to connect its database. + +Options: +-h + Print detailed help screen +-u + User to log in as +-p + Password to log in +-H + Host to log in to + +EOF +} + diff --git a/flakes/private/monitoring/plugins/check_last_file_date b/flakes/private/monitoring/plugins/check_last_file_date new file mode 100755 index 0000000..f51a258 --- /dev/null +++ b/flakes/private/monitoring/plugins/check_last_file_date @@ -0,0 +1,28 @@ +#!/bin/bash + +STATE_OK=0 +STATE_WARNING=1 +STATE_CRITICAL=2 +STATE_UNKNOWN=3 + +base_path=$1 +hours=$2 + +last_date=$(find $base_path -mindepth 1 -maxdepth 1 -printf "%T@\n" 2>/dev/null | sort | tail -n 1) + +if [ -z "$last_date" ]; then + echo "UNKNOWN: Could not read folder" + exit $STATE_UNKNOWN +else + LC_ALL=C last_date=$(printf "%.*f" 0 $last_date) + LC_ALL=C age=$(( $(date "+%s") - $last_date)) + max_age=$(( $hours * 60 * 60 )) + min_date=$(date -d "$hours hours ago" "+%s") + if [ "$min_date" -lt "$last_date" ]; then + echo "OK: Last file $(date -d @$last_date) | age=${age}s;;$max_age;;" + exit $STATE_OK + else + echo "CRITICAL: Last file $(date -d @$last_date) | age=${age}s;;$max_age;;" + exit $STATE_CRITICAL + fi +fi diff --git a/flakes/private/monitoring/plugins/check_mem.sh b/flakes/private/monitoring/plugins/check_mem.sh new file mode 100755 index 0000000..3a29040 --- /dev/null +++ b/flakes/private/monitoring/plugins/check_mem.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash + +if [ "$1" = "-w" ] && [ "$2" -gt "0" ] && [ "$3" = "-c" ] && [ "$4" -gt "0" ]; then + FreeM=`free -m -w` + memTotal_m=`echo "$FreeM" |grep Mem |awk '{print $2}'` + memUsed_m=`echo "$FreeM" |grep Mem |awk '{print $3}'` + memFree_m=`echo "$FreeM" |grep Mem |awk '{print $4}'` + memShared_m=`echo "$FreeM" |grep Mem |awk '{print $5}'` + memBuffer_m=`echo "$FreeM" |grep Mem |awk '{print $6}'` + memCache_m=`echo "$FreeM" |grep Mem |awk '{print $7}'` + memAvailable_m=`echo "$FreeM" |grep Mem |awk '{print $8}'` + memUsedPrc=`echo $((($memUsed_m-$memBuffer_m-$memCache_m)*100/$memTotal_m))||cut -d. -f1` + if [ "$memUsedPrc" -ge "$4" ]; then + echo "Memory: CRITICAL Total: $memTotal_m MB - Used/Buffer/Cache: $memUsed_m MB - $memUsedPrc% used!|TOTAL=$memTotal_m;;;; USED=$memUsed_m;;;; SHARED=$memShared_m;;;; CACHE=$memCache_m;;;; AVAILABLE=$memAvailable_m;;;; BUFFER=$memBuffer_m;;;;" + exit 2 + elif [ "$memUsedPrc" -ge "$2" ]; then + echo "Memory: WARNING Total: $memTotal_m MB - Used/Buffer/Cache: $memUsed_m MB - $memUsedPrc% used!|TOTAL=$memTotal_m;;;; USED=$memUsed_m;;;; SHARED=$memShared_m;;;; CACHE=$memCache_m;;;; AVAILABLE=$memAvailable_m;;;; BUFFER=$memBuffer_m;;;;" + exit 1 + else + echo "Memory: OK Total: $memTotal_m MB - Used/Buffer/Cache: $memUsed_m MB - $memUsedPrc% used|TOTAL=$memTotal_m;;;; USED=$memUsed_m;;;; SHARED=$memShared_m;;;; CACHE=$memCache_m;;;; AVAILABLE=$memAvailable_m;;;; BUFFER=$memBuffer_m;;;;" + exit 0 + fi +else # If inputs are not as expected, print help. + sName="`echo $0|awk -F '/' '{print $NF}'`" + echo -e "\n\n\t\t### $sName Version 2.0###\n" + echo -e "# Usage:\t$sName -w -c " + echo -e "\t\t= warnlevel and critlevel is percentage value without %\n" + echo "# EXAMPLE:\t/usr/lib64/nagios/plugins/$sName -w 80 -c 90" + echo -e "\nCopyright (C) 2012 Lukasz Gogolin (lukasz.gogolin@gmail.com), improved by Nestor 2015\n\n" + exit +fi diff --git a/flakes/private/monitoring/plugins/check_mysql_replication b/flakes/private/monitoring/plugins/check_mysql_replication new file mode 100755 index 0000000..1ee5de1 --- /dev/null +++ b/flakes/private/monitoring/plugins/check_mysql_replication @@ -0,0 +1,41 @@ +#!/bin/bash + +STATE_OK=0 +STATE_WARNING=1 +STATE_CRITICAL=2 +STATE_UNKNOWN=3 + +socket=$1 +config_file=$2 +info=$(mysql --defaults-file=${config_file} -S $socket -e "show slave status" --vertical) +exit_code=$? + +lag=$(echo "$info" | grep "\bSeconds_Behind_Master\b" | cut -d':' -f2 | sed -e "s/\s//g") + +IO_running=$(echo "$info" | grep "\bSlave_IO_Running\b" | cut -d':' -f2 | sed -e "s/\s//g") +SQL_running=$(echo "$info" | grep "\bSlave_SQL_Running\b" | cut -d':' -f2 | sed -e "s/\s//g") + +if [[ $exit_code -ne 0 ]]; then + echo "UNKNOWN - Impossible to run mysql command" + exit $STATE_UNKNOWN +elif [[ -z "$lag" ]]; then + echo "UNKNOWN - No replication found for mysql" + exit $STATE_UNKNOWN +elif [[ "$IO_running" != "Yes" || "$SQL_running" != "Yes" ]]; then + echo "UNKNOWN - Replication is not running" + exit $STATE_UNKNOWN +else + output="Replication lag for mysql is ${lag}s" + LC_ALL=C lag=$(printf "%.*f" 0 $lag) + + if [[ $lag -lt 5 ]]; then + echo "OK - $output | time=${lag}s;5;10;;" + exit $STATE_OK + elif [[ $lag -lt 10 ]]; then + echo "WARNING - $output | time=${lag}s;5;10;;" + exit $STATE_WARNING + else + echo "CRITICAL - $output | time=${lag}s;5;10;;" + exit $STATE_CRITICAL + fi +fi diff --git a/flakes/private/monitoring/plugins/check_openldap_replication b/flakes/private/monitoring/plugins/check_openldap_replication new file mode 100755 index 0000000..7136ad5 --- /dev/null +++ b/flakes/private/monitoring/plugins/check_openldap_replication @@ -0,0 +1,54 @@ +#!/bin/bash + +STATE_OK=0 +STATE_WARNING=1 +STATE_CRITICAL=2 +STATE_UNKNOWN=3 + +distant_host="$1" +replication_dn="$2" +replication_pw="$3" +base="$4" +config="$5" + +to_date() { + i="$1" + i=$(echo "$i" | grep contextCSN | cut -d":" -f2 | sed -e "s/\s//g") + i=$(echo "$i" | cut -d"#" -f1) + i=$(echo "$i" | cut -d"." -f1) + echo "$i" +} + +# ldap +remote_ldap=$(ldapsearch -H $distant_host -D "$replication_dn" -y "$replication_pw" -b "$base" -s base -LLL contextCSN ) +exit_code_remote=$? +remote_ldap=$(to_date "$remote_ldap") + +# slapcat +local_ldap=$(slapcat -b "$base" -f "$config" -a "(entryDN=$base)") +exit_code_local=$? +local_ldap=$(to_date "$local_ldap") + +offset=$(($remote_ldap - $local_ldap)) + +if [[ $exit_code_remote -ne 0 || $exit_code_local -ne 0 ]]; then + echo "UNKNOWN - Impossible to run ldap command" + exit $STATE_UNKNOWN +elif [[ -z "$offset" ]]; then + echo "UNKNOWN - No replication found" + exit $STATE_UNKNOWN +else + output="Replication lag for openldap is ${offset}s" + LC_ALL=C lag=$(printf "%.*f" 0 $lag) + + if [[ $offset -lt 5 ]]; then + echo "OK - $output | time=${offset}s;5;10;;" + exit $STATE_OK + elif [[ $offset -lt 10 ]]; then + echo "WARNING - $output | time=${offset}s;5;10;;" + exit $STATE_WARNING + else + echo "CRITICAL - $output | time=${offset}s;5;10;;" + exit $STATE_CRITICAL + fi +fi diff --git a/flakes/private/monitoring/plugins/check_ovh_sms b/flakes/private/monitoring/plugins/check_ovh_sms new file mode 100755 index 0000000..caf279c --- /dev/null +++ b/flakes/private/monitoring/plugins/check_ovh_sms @@ -0,0 +1,25 @@ +#!/usr/bin/env python + +import sys +try: + import ovh + + [endpoint, application_key, application_secret, consumer_key, account] = sys.argv[1].split(",") + client = ovh.Client( + endpoint=endpoint, + application_key=application_key, + application_secret=application_secret, + consumer_key=consumer_key, + ) + + result = client.get('/sms/{}'.format(account))["creditsLeft"] + + if result < 20: + print("SMS OVH Critical - Not enough sms left ({})|SMS={};;;;".format(result, result)) + sys.exit(2) + else: + print("SMS OVH Ok - Enough sms left ({})|SMS={};;;;".format(result, result)) + sys.exit(0) +except Exception: + print("SMS OVH UNKNOWN - Error during script") + sys.exit(3) diff --git a/flakes/private/monitoring/plugins/check_postgres_database_count b/flakes/private/monitoring/plugins/check_postgres_database_count new file mode 100755 index 0000000..43bdd8c --- /dev/null +++ b/flakes/private/monitoring/plugins/check_postgres_database_count @@ -0,0 +1,32 @@ +#!/bin/bash + +STATE_OK=0 +STATE_WARNING=1 +STATE_CRITICAL=2 +STATE_UNKNOWN=3 + +host=$1 +port=$2 +min=$3 + +count=$(psql -h $host -p $port -A -q -c '\t' -c 'select count(datname) from pg_catalog.pg_database' postgres 2>&1) +exit_code=$? + +if [[ $exit_code -ne 0 ]]; then + echo "UNKNOWN - Impossible to run psql command: $count" + exit $STATE_UNKNOWN +elif [[ -z "$count" ]]; then + echo "UNKNOWN - No database found" + exit $STATE_UNKNOWN +else + output="Database count is $count" + LC_ALL=C count=$(printf "%.*f" 0 $count) + + if [[ $count -gt $min ]]; then + echo "OK - $output | count=${count};$min;$min;0;" + exit $STATE_OK + else + echo "CRITICAL - $output | count=${count};$min;$min;0;" + exit $STATE_CRITICAL + fi +fi diff --git a/flakes/private/monitoring/plugins/check_postgres_replication b/flakes/private/monitoring/plugins/check_postgres_replication new file mode 100755 index 0000000..ff257a3 --- /dev/null +++ b/flakes/private/monitoring/plugins/check_postgres_replication @@ -0,0 +1,35 @@ +#!/bin/bash + +STATE_OK=0 +STATE_WARNING=1 +STATE_CRITICAL=2 +STATE_UNKNOWN=3 + +user=$1 +host=$2 +port=$3 + +lag=$(psql -h $host -p $port -A -t -c "SELECT COALESCE(EXTRACT(EPOCH FROM replay_lag),0) FROM pg_stat_replication WHERE usename='$user'" 2>/dev/null) +exit_code=$? + +if [[ $exit_code -ne 0 ]]; then + echo "UNKNOWN - Impossible to run psql command" + exit $STATE_UNKNOWN +elif [[ -z "$lag" ]]; then + echo "UNKNOWN - No replication found for $user" + exit $STATE_UNKNOWN +else + output="Replication lag for $user is ${lag}s" + LC_ALL=C lag=$(printf "%.*f" 0 $lag) + + if [[ $lag -lt 5 ]]; then + echo "OK - $output | time=${lag}s;5;10;0;" + exit $STATE_OK + elif [[ $lag -lt 10 ]]; then + echo "WARNING - $output | time=${lag}s;5;10;0;" + exit $STATE_WARNING + else + echo "CRITICAL - $output | time=${lag}s;5;10;0;" + exit $STATE_CRITICAL + fi +fi diff --git a/flakes/private/monitoring/plugins/check_redis_replication b/flakes/private/monitoring/plugins/check_redis_replication new file mode 100755 index 0000000..6dbe4c4 --- /dev/null +++ b/flakes/private/monitoring/plugins/check_redis_replication @@ -0,0 +1,38 @@ +#!/bin/bash + +STATE_OK=0 +STATE_WARNING=1 +STATE_CRITICAL=2 +STATE_UNKNOWN=3 + +socket=$1 + +info=$(redis-cli -s $socket info replication) +lag=$(echo "$info" | grep master_last_io_seconds_ago | cut -d":" -f2 | sed -e "s/\s//g") +slave_offset=$(echo "$info" | grep slave_repl_offset | cut -d":" -f2 | sed -e "s/\s//g") +master_offset=$(echo "$info" | grep master_repl_offset | cut -d":" -f2 | sed -e "s/\s//g") +offset=$(($master_offset - $slave_offset)) + +exit_code=$? + +if [[ $exit_code -ne 0 ]]; then + echo "UNKNOWN - Impossible to run redis command" + exit $STATE_UNKNOWN +elif [[ -z "$lag" ]]; then + echo "UNKNOWN - No replication found" + exit $STATE_UNKNOWN +else + output="Replication lag for redis is ${lag}s and offset is ${offset}" + LC_ALL=C lag=$(printf "%.*f" 0 $lag) + + if [[ $lag -lt 5 && $offset -lt 5 ]]; then + echo "OK - $output | time=${lag}s;5;10;0; offset=${offset};5;10;0;" + exit $STATE_OK + elif [[ $lag -lt 10 && $offset -lt 10 ]]; then + echo "WARNING - $output | time=${lag}s;5;10;0; offset=${offset};5;10;0;" + exit $STATE_WARNING + else + echo "CRITICAL - $output | time=${lag}s;5;10;0; offset=${offset};5;10;0;" + exit $STATE_CRITICAL + fi +fi diff --git a/flakes/private/monitoring/plugins/check_zfs_snapshot b/flakes/private/monitoring/plugins/check_zfs_snapshot new file mode 100755 index 0000000..56f8c4f --- /dev/null +++ b/flakes/private/monitoring/plugins/check_zfs_snapshot @@ -0,0 +1,325 @@ +#! /bin/sh + +OS=$(uname) + +# MIT License +# +# Copyright (c) 2016 Josef Friedrich +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +######################################################################## +# Date functions +######################################################################## + +# This date function must be placed on the top of this file because +# they are used in some global variables. + +# to_year ### + +## +# Get the four digit year integer from now. +# +# Return: +# The current 4 digit year. +## +_now_to_year() { + date +%Y +} + +## +# Convert a date in the format YYYY-MM-DD to a four digit year integer. +# +# Parameters: +# a date in the format YYYY-MM-DD +# +# Return: +# four digit year integer +## +_date_to_year() { + local OPTIONS + if [ "$OS" = 'Linux' ]; then + OPTIONS="--date $1" + # FreeBSD, Darwin + else + OPTIONS="-j -f %Y-%m-%d $1" + fi + date $OPTIONS +%Y +} + +# to_datetime ### + +## +# Convert a UNIX timestamp to a datetime string. +# +# Parameters: +# UNIX timestamp +# +# Return: +# %Y-%m-%d.%H:%M:%S +## +_timestamp_to_datetime() { + local OPTIONS + if [ "$OS" = 'Linux' ]; then + OPTIONS="--date @$1" + # FreeBSD, Darwin + else + OPTIONS="-j -f %s $1" + fi + date $OPTIONS +%Y-%m-%d.%H:%M:%S +} + +# to_timestamp ### + +## +# Get the current UNIX timestamp. +# +# Return: +# %current UNIX timestamp +## +_now_to_timestamp() { + date +%s +} + +PROJECT_PAGES='https://github.com/Josef-Friedrich/check_zfs_snapshot +https://exchange.icinga.com/joseffriedrich/check_zfs_snapshot +https://exchange.nagios.org/directory/Plugins/System-Metrics/File-System/check_zfs_snapshot/details' + +VERSION=1.2 +FIRST_RELEASE=2016-09-08 +SHORT_DESCRIPTION="Monitoring plugin to check how long ago the last \ +snapshot of a ZFS dataset was created." +USAGE="check_zfs_snapshot v$VERSION +Copyright (c) $(_date_to_year $FIRST_RELEASE)-$(_now_to_year) \ +Josef Friedrich + +$SHORT_DESCRIPTION + + +Usage: check_zfs_snapshot + +Options: + -c, --critical=OPT_CRITICAL + Interval in seconds for critical state. + -d, --dataset=OPT_DATASET + The ZFS dataset to check. + -h, --help + Show this help. + -s, --short-description + Show a short description of the command. + -v, --version + Show the version number. + -w, --warning=OPT_WARNING + Interval in seconds for warning state. Must be lower than -c + +Performance data: + - last_ago + Time interval in seconds for last snapshot. + - warning + Interval in seconds. + - critical + Interval in seconds. + - snapshot_count + How many snapshot exists in the given dataset and all child + datasets exists. +" + +# Exit codes +STATE_OK=0 +STATE_WARNING=1 +STATE_CRITICAL=2 +STATE_UNKNOWN=3 + +_get_last_snapshot() { + zfs get creation -Hpr -t snapshot "$1" | \ + awk 'BEGIN {max = 0} {if ($3>max) max=$3} END {print max}' +} + +_getopts() { + while getopts ':c:d:hsvw:-:' OPT ; do + case $OPT in + + c) + OPT_CRITICAL=$OPTARG + ;; + + d) + OPT_DATASET="$OPTARG" + ;; + + h) + echo "$USAGE" + exit 0 + ;; + + s) + echo "$SHORT_DESCRIPTION" + exit 0 + ;; + + v) + echo "$VERSION" + exit 0 + ;; + + w) + OPT_WARNING=$OPTARG + ;; + + \?) + echo "Invalid option “-$OPTARG”!" >&2 + exit 2 + ;; + + :) + echo "Option “-$OPTARG” requires an argument!" >&2 + exit 3 + ;; + + -) + LONG_OPTARG="${OPTARG#*=}" + + case $OPTARG in + + critical=?*) + OPT_CRITICAL=$LONG_OPTARG + ;; + + dataset=?*) + OPT_DATASET="$LONG_OPTARG" + ;; + + help) + echo "$USAGE" + exit 0 + ;; + + short-description) + echo "$SHORT_DESCRIPTION" + exit 0 + ;; + + version) + echo "$VERSION" + exit 0 + ;; + + warning=?*) + OPT_WARNING=$LONG_OPTARG + ;; + + critical*|dataset*|warning*) + echo "Option “--$OPTARG” requires an argument!" >&2 + exit 3 + ;; + + help*|short-description*|version*) + echo "No argument allowed for the option “--$OPTARG”!" >&2 + exit 4 + ;; + + '') # "--" terminates argument processing + break + ;; + + *) + echo "Invalid option “--$OPTARG”!" >&2 + exit 2 + ;; + + esac + ;; + + esac + done +} + +_snapshot_count() { + # FreeBSD wc adds some whitespaces before the number! + # cat $HOME/debug | wc -l + # 7 + local COUNT + COUNT="$(zfs list -t snapshot | grep "$1" | wc -l)" + echo $COUNT +} + +_performance_data() { + echo "| \ +last_ago=${DIFF}s;$OPT_WARNING;$OPT_CRITICAL;0 \ +count=$(_snapshot_count "$OPT_DATASET");;;0\ +" +} + +## This SEPARATOR is required for test purposes. Please don’t remove! ## + +_getopts $@ + +if [ -z "$OPT_WARNING" ]; then + # 1 day + OPT_WARNING=86400 +fi + +if [ -z "$OPT_CRITICAL" ]; then + # 3 day + OPT_CRITICAL=259200 +fi + +if [ -z "$OPT_DATASET" ]; then + echo "Dataset has to be set! Use option -d " >&2 + echo "$USAGE" >&2 + exit $STATE_UNKNOWN +fi + +if ! zfs list "$OPT_DATASET" > /dev/null 2>&1; then + echo "'$OPT_DATASET' is no ZFS dataset!" >&2 + echo "$USAGE" >&2 + exit $STATE_UNKNOWN +fi + +NOW=$(_now_to_timestamp) + +CREATION_DATE=$(_get_last_snapshot "$OPT_DATASET") + +DIFF=$((NOW - CREATION_DATE)) + +if [ "$OPT_WARNING" -gt "$OPT_CRITICAL" ]; then + echo '-w OPT_WARNING must be smaller than -c OPT_CRITICAL' + _usage >&2 + exit $STATE_UNKNOWN +fi + +RETURN=STATE_UNKNOWN + +if [ "$DIFF" -gt "$OPT_CRITICAL" ]; then + RETURN=$STATE_CRITICAL + MESSAGE="CRITICAL:" +elif [ "$DIFF" -gt "$OPT_WARNING" ]; then + RETURN=$STATE_WARNING + MESSAGE="WARNING:" +else + RETURN=$STATE_OK + MESSAGE="OK:" +fi + +DATE="$(_timestamp_to_datetime "$CREATION_DATE")" + +echo "$MESSAGE Last snapshot for dataset '$OPT_DATASET' was created on $DATE $(_performance_data)" + +exit $RETURN diff --git a/flakes/private/monitoring/plugins/notify_by_apprise b/flakes/private/monitoring/plugins/notify_by_apprise new file mode 100755 index 0000000..82bc5a3 --- /dev/null +++ b/flakes/private/monitoring/plugins/notify_by_apprise @@ -0,0 +1,31 @@ +#!/usr/bin/env bash + +APPRISE_USERNAME="Naemon" +APPRISE_USERICON="https://assets.immae.eu/monitoring.png" +APPRISE_URLS=$(echo "$2" | sed -e "s/{username}/$APPRISE_USERNAME/g" -e "s@{image_url}@$APPRISE_USERICON@g") + +if [ "$SERVICESTATE" = "CRITICAL" ]; then + ICON="❗" +elif [ "$SERVICESTATE" = "WARNING" ]; then + ICON="⚠️:" +elif [ "$SERVICESTATE" = "OK" ]; then + ICON="✅" +elif [ "$SERVICESTATE" = "UNKNOWN" ]; then + ICON="❓" +elif [ "$HOSTSTATE" = "UP" ]; then + ICON="✅" +elif [ "$HOSTSTATE" = "DOWN" ]; then + ICON="❗" +elif [ "$HOSTSTATE" = "UNKNOWN" ]; then + ICON="❓" +elif [ "$HOSTSTATE" = "UNREACHABLE" ]; then + ICON="❓" +else + ICON="◻" +fi + +if [ "$1" = "host" ]; then + apprise --title "${ICON} ${NOTIFICATIONTYPE} ${HOST} is ${HOSTSTATE}" --body "$HOSTOUTPUT" $APPRISE_URLS +else + apprise --title "${ICON} ${NOTIFICATIONTYPE} ${SERVICEDESC} on ${HOST} is ${SERVICESTATE}" --body "$SERVICEOUTPUT" $APPRISE_URLS +fi diff --git a/flakes/private/monitoring/plugins/notify_by_email b/flakes/private/monitoring/plugins/notify_by_email new file mode 100755 index 0000000..959db26 --- /dev/null +++ b/flakes/private/monitoring/plugins/notify_by_email @@ -0,0 +1,29 @@ +#!/usr/bin/env bash + +# $1 = service/host + +# $2 = type (PROBLEM RECOVERY ACKNOWLEDGEMENT FLAPPINGSTART FLAPPINGSTOP FLAPPINGDISABLED DOWNTIMESTART DOWNTIMESTOP DOWNTIMECANCELLED) +# http://www.naemon.org/documentation/usersguide/macrolist.html#notificationtype + +# $3 = host alias + +# $4 = date (YYYY-MM-DDTHH:MM:SS) + +# $5 = E-mail + +NOTIFICATION_TYPE="$2" +HOST_ALIAS="$3" +DATE="$4" +CONTACT="$5" + +message="" + +if [ "$1" = "host" ]; then + message=$(printf "%b" "***** Naemon *****\n\nNotification Type: $NOTIFICATION_TYPE\n\nHost: $HOST_ALIAS\nState: $HOSTSTATE\nInfo: $HOSTOUTPUT\n\nDate/Time: $DATE\n") + subject="** $NOTIFICATION_TYPE Host Alert: $HOST_ALIAS is $HOSTSTATE **" +else + message=$(printf "%b" "***** Naemon *****\n\nNotification Type: $NOTIFICATION_TYPE\n\nService: $SERVICEDESC\nHost: $HOST_ALIAS\nState: $SERVICESTATE\n\nDate/Time: $DATE\n\nAdditional Info:\n\n$SERVICEOUTPUT\n") + subject="** $NOTIFICATION_TYPE Service Alert: $HOST_ALIAS/$SERVICEDESC is $SERVICESTATE **" +fi + +echo "$message" | MAILRC=/dev/null mail -r "$ADMINEMAIL" -n -s "$subject" "$CONTACT" diff --git a/flakes/private/monitoring/plugins/notify_by_slack b/flakes/private/monitoring/plugins/notify_by_slack new file mode 100755 index 0000000..1b16a0d --- /dev/null +++ b/flakes/private/monitoring/plugins/notify_by_slack @@ -0,0 +1,46 @@ +#!/usr/bin/env bash + +SLACK_CHANNEL="$1" +SLACK_USERNAME="Naemon" +SLACK_URL="$2" +SLACK_USERICON="https://assets.immae.eu/monitoring.png" + +if [ "$SERVICESTATE" = "CRITICAL" ]; then + ICON=":exclamation:" + COLOR="#DA0505" +elif [ "$SERVICESTATE" = "WARNING" ]; then + ICON=":warning:" + COLOR="#F1E903" +elif [ "$SERVICESTATE" = "OK" ]; then + ICON=":white_check_mark:" + COLOR="#36a64f" +elif [ "$SERVICESTATE" = "UNKNOWN" ]; then + ICON=":question:" + COLOR="#000000" +else + ICON=":white_medium_square:" + COLOR="#ffffff" +fi + +payload=$(echo "{}" | jq -r \ + --arg "icon_url" "$SLACK_USERICON" \ + --arg "channel" "$SLACK_CHANNEL" \ + --arg "username" "$SLACK_USERNAME" \ + --arg "text" "${ICON} ${SERVICEDESC} on ${HOST} is ${SERVICESTATE}" \ + --arg "color" "$COLOR" \ + --arg "host" "$HOST" \ + --arg "desc" "$SERVICEDESC" \ + --arg "state" "$SERVICESTATE" \ + --arg "output" "$SERVICEOUTPUT" \ + '.icon_url = $icon_url | + .channel = $channel | + .username = $username | + .text = $text | + .attachments = [{fallback:"", color:$color,fields: [{},{},{},{}]}] | + .attachments[0].fields[0] = {title:"Host",value:$host,short:true} | + .attachments[0].fields[1] = {title:"Service description",value:$desc,short:true} | + .attachments[0].fields[2] = {title:"Status",value:$state,short:true} | + .attachments[0].fields[3] = {title:"Message",value:$output,short:false} + ') + +curl -X POST --data "payload=$payload" $SLACK_URL diff --git a/flakes/private/monitoring/plugins/send_nrdp.sh b/flakes/private/monitoring/plugins/send_nrdp.sh new file mode 100755 index 0000000..c83c8cb --- /dev/null +++ b/flakes/private/monitoring/plugins/send_nrdp.sh @@ -0,0 +1,57 @@ +#!/bin/bash + +TEMPLATE='{ + "cmd": "submitcheck", + "token": $token, + "checkresult": [{ + "hostname": $hostname, + "state": $state, + "output": $output, + "type": $type, + "servicename": $servicename + }] +}' + +while getopts "u:t:H:s:S:o:" option +do + case $option in + u) url=$OPTARG ;; + t) token=$OPTARG ;; + H) hostname=$OPTARG ;; + s) servicename=$OPTARG ;; + S) state=$OPTARG ;; + o) output=$OPTARG ;; + esac +done + +if [ -n "$servicename" ]; then + checktype="service" +else + checktype="host" +fi + +payload=$(jq -n \ + --arg type "$checktype" \ + --arg hostname "$hostname" \ + --arg servicename "$servicename" \ + --arg output "$output" \ + --arg token "$token" \ + --arg state "$state" \ + "$TEMPLATE") + +rslt=$(curl -f --silent --insecure -d "$payload" -H "Content-Type: application/json" "$url") +ret=$? + +if [ $ret != 0 ];then + echo "ERROR: could not connect to NRDP server at $url" + exit 1 +fi + +status=$(echo "$rslt" | jq -r .status) +message=$(echo "$rslt" | jq -r .message) + +if [ "$status" != "ok" ];then + echo "ERROR: The NRDP Server said $message" + exit 2 +fi +echo "Sent 1 checks to $url" diff --git a/flakes/private/monitoring/send_mails b/flakes/private/monitoring/send_mails new file mode 100755 index 0000000..105c505 --- /dev/null +++ b/flakes/private/monitoring/send_mails @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +CHECK_DIR=$1 +shift +RETURN_PATH=$1 +shift + +for mail in "$@"; do + echo "Test Mail" | MAILRC=/dev/null mail -n -r "$RETURN_PATH" -s "TestMailImmae " "$mail" +done + +if [ -d "$CHECK_DIR" ]; then + cd $CHECK_DIR + stat -c '%n;%y' * +fi diff --git a/flakes/private/monitoring/to_objects.nix b/flakes/private/monitoring/to_objects.nix new file mode 100644 index 0000000..57a71ad --- /dev/null +++ b/flakes/private/monitoring/to_objects.nix @@ -0,0 +1,77 @@ +{ lib }: + with lib.attrsets; + with lib.strings; + with lib.lists; + with lib.trivial; +let + pad = width: str: let + padWidth = width - stringLength str; + padding = concatStrings (genList (const " ") padWidth); + in str + optionalString (padWidth > 0) padding; + toStr = k: v: + if k == "check_command" && builtins.isList v + then builtins.concatStringsSep "!" v + else builtins.toString v; + + toService = service: '' + define service { + ${builtins.concatStringsSep "\n" (mapAttrsToList (k: v: + " ${pad 30 k} ${toStr k v}" + ) (filterAttrs (k: v: ! hasPrefix "__passive_" k) service))} + } + ''; + toServices = services: builtins.concatStringsSep "\n" (map toService services); + + toCommand = k: v: '' + define command { + ${pad 30 "command_name"} ${k} + ${pad 30 "command_line"} ${v} + } + ''; + toCommands = a: builtins.concatStringsSep "\n" (mapAttrsToList toCommand a); + + toOther = keyname: k: v: '' + define ${keyname} { + ${pad 30 "${keyname}_name"} ${k} + ${builtins.concatStringsSep "\n" (mapAttrsToList (kk: vv: + " ${pad 30 kk} ${vv}" + ) v)} + } + ''; + toOtherNoName = keyname: v: '' + define ${keyname} { + ${builtins.concatStringsSep "\n" (mapAttrsToList (kk: vv: + " ${pad 30 kk} ${vv}" + ) v)} + } + ''; + toOthers = keyname: a: builtins.concatStringsSep "\n" (mapAttrsToList (toOther keyname) a); + toOthersArray = keyname: a: builtins.concatStringsSep "\n" (map (toOtherNoName keyname) a); + + toTemplate = keyname: k: v: '' + define ${keyname} { + ${pad 30 "name"} ${k} + ${pad 30 "register"} 0 + ${builtins.concatStringsSep "\n" (mapAttrsToList (kk: vv: + " ${pad 30 kk} ${builtins.toString vv}" + ) v)} + } + ''; + toTemplates' = keyname: a: builtins.concatStringsSep "\n" (mapAttrsToList (toTemplate keyname) a); + toTemplates = v: builtins.concatStringsSep "\n" (mapAttrsToList toTemplates' v); + + toObjects' = keyname: v: + if keyname == "service" + then toServices v + else if keyname == "command" + then toCommands v + else if keyname == "templates" + then toTemplates v + else if builtins.elem keyname ["hostgroup" "host" "contactgroup" "contact" "timeperiod" "servicegroup"] + then toOthers keyname v + else if builtins.elem keyname ["servicedependency" "hostdependency"] + then toOthersArray keyname v + else builtins.trace ("Warning: unknown object type " + keyname) ""; + toObjects = v: builtins.concatStringsSep "\n" (mapAttrsToList toObjects' v); +in + toObjects -- cgit v1.2.3