From: Ismaël Bouya Date: Wed, 17 Jul 2019 09:33:57 +0000 (+0200) Subject: Add monitoring modules via naemon X-Git-Url: https://git.immae.eu/?p=perso%2FImmae%2FConfig%2FNix.git;a=commitdiff_plain;h=3bc32d9ebd9b0900eeef756122ad28f8857f8bfe Add monitoring modules via naemon --- diff --git a/modules/default.nix b/modules/default.nix index 5346956..9e9c411 100644 --- a/modules/default.nix +++ b/modules/default.nix @@ -13,6 +13,8 @@ opendmarc = ./opendmarc.nix; openarc = ./openarc.nix; + naemon = ./naemon; + php-application = ./websites/php-application.nix; websites = ./websites; } // (if builtins.pathExists ./private then import ./private else {}) diff --git a/modules/myids.nix b/modules/myids.nix index e949ca7..ac9fd65 100644 --- a/modules/myids.nix +++ b/modules/myids.nix @@ -14,6 +14,7 @@ mastodon = 399; }; ids.gids = { + nagios = 11; # commented in the ids file vhost = 390; openarc = 391; opendmarc = 392; diff --git a/modules/naemon/default.nix b/modules/naemon/default.nix new file mode 100644 index 0000000..38e99a9 --- /dev/null +++ b/modules/naemon/default.nix @@ -0,0 +1,184 @@ +{ config, lib, pkgs, ... }: + +with lib; + +let + cfg = config.services.naemon; + + naemonConfig = pkgs.runCommand "naemon-config" { + objectsFile = pkgs.writeText "naemon_objects.cfg" cfg.objectDefs; + resourceFile = config.secrets.fullPaths."naemon/resources.cfg"; + extraConfig = pkgs.writeText "extra.cfg" cfg.extraConfig; + inherit (cfg) logDir varDir runDir cacheDir; + } '' + substituteAll ${./naemon.cfg} $out + cat $extraConfig >> $out + ''; +in +{ + options = { + services.naemon = { + enable = mkOption { + default = false; + description = " + Whether to use Naemon to monitor + your system or network. + "; + }; + + objectDefs = mkOption { + type = types.lines; + default = ""; + description = " + A list of Naemon object configuration that must define + the hosts, host groups, services and contacts for the + network that you want Naemon to monitor. + "; + }; + + extraResource = mkOption { + type = types.lines; + default = ""; + example = '' + # Sets $USER2$ to be the path to event handlers + #$USER2$=/usr/lib/monitoring-plugins/eventhandlers + + # Store some usernames and passwords (hidden from the CGIs) + #$USER3$=someuser + #$USER4$=somepassword + ''; + description = " + Lines to add to the resource file + # You can define $USERx$ macros in this file, which can in turn be used + # in command definitions in your host config file(s). $USERx$ macros are + # useful for storing sensitive information such as usernames, passwords, + # etc. They are also handy for specifying the path to plugins and + # event handlers - if you decide to move the plugins or event handlers to + # a different directory in the future, you can just update one or two + # $USERx$ macros, instead of modifying a lot of command definitions. + # + # Naemon supports up to 256 $USERx$ macros ($USER1$ through $USER256$) + # + # Resource files may also be used to store configuration directives for + # external data sources like MySQL... + # + "; + }; + + extraConfig = mkOption { + type = types.lines; + default = ""; + description = " + Extra config to append to main config + "; + }; + + user = mkOption { + type = types.str; + default = "naemon"; + description = "User for naemon"; + }; + + group = mkOption { + type = types.str; + default = "naemon"; + description = "Group for naemon"; + }; + + varDir = mkOption { + type = types.path; + default = "/var/lib/naemon"; + description = "The directory where naemon stores its data"; + }; + + cacheDir = mkOption { + type = types.path; + default = "/var/cache/naemon"; + description = "The directory where naemon stores its cache"; + }; + + runDir = mkOption { + type = types.path; + default = "/run/naemon"; + description = "The directory where naemon stores its runtime files"; + }; + + logDir = mkOption { + type = types.path; + default = "/var/log/naemon"; + description = "The directory where naemon stores its log files"; + }; + + package = mkOption { + type = types.package; + default = pkgs.naemon.override { + inherit (cfg) varDir cacheDir logDir runDir user group; + }; + description = '' + Naemon package to use + ''; + }; + }; + }; + + + config = mkIf cfg.enable { + secrets.keys = [ + { + dest = "naemon/resources.cfg"; + user = cfg.user; + group = cfg.group; + permissions = "0400"; + text = '' + $USER1$=${pkgs.monitoring-plugins}/libexec + ${cfg.extraResource} + ''; + } + ]; + + users.users = optionalAttrs (cfg.user == "naemon") (singleton + { + name = "naemon"; + group = cfg.group; + uid = config.ids.uids.nagios; + extraGroups = [ "keys" ]; + }); + users.groups = optionalAttrs (cfg.user == "naemon") (singleton + { + name = "naemon"; + gid = config.ids.gids.nagios; + }); + + services.filesWatcher.naemon = { + paths = [ config.secrets.fullPaths."naemon/resources.cfg" ]; + }; + systemd.services.naemon = { + description = "Naemon monitoring daemon"; + path = [ cfg.package pkgs.monitoring-plugins ]; + wantedBy = [ "multi-user.target" ]; + after = [ "network.target" ]; + + preStart = "${cfg.package}/bin/naemon -vp ${naemonConfig}"; + script = "${cfg.package}/bin/naemon --daemon ${naemonConfig}"; + reload = "${pkgs.utillinux}/bin/kill -HUP $MAINPID"; + serviceConfig = { + User = cfg.user; + Restart = "always"; + RestartSec = 2; + StandardOutput = "journal"; + StandardError = "inherit"; + PIDFile = "${cfg.runDir}/naemon.pid"; + LogsDirectory = assert lib.strings.hasPrefix "/var/log/" cfg.logDir; + lib.strings.removePrefix "/var/log/" cfg.logDir; + CacheDirectory = assert lib.strings.hasPrefix "/var/cache/" cfg.cacheDir; + let unprefixed = lib.strings.removePrefix "/var/cache/" cfg.cacheDir; + in [ unprefixed "${unprefixed}/checkresults" ]; + StateDirectory = assert lib.strings.hasPrefix "/var/lib/" cfg.varDir; + lib.strings.removePrefix "/var/lib/" cfg.varDir; + RuntimeDirectory = assert lib.strings.hasPrefix "/run/" cfg.runDir; + lib.strings.removePrefix "/run/" cfg.runDir; + }; + }; + }; +} diff --git a/modules/naemon/naemon.cfg b/modules/naemon/naemon.cfg new file mode 100644 index 0000000..792e03b --- /dev/null +++ b/modules/naemon/naemon.cfg @@ -0,0 +1,1059 @@ +############################################################################## +# +# naemon.cfg - Sample Main Config File for Naemon 1.0.10.gc2a87305.dirty.20190703.source +# +# Read the documentation for more information on this configuration +# file. I've provided some comments here, but things may not be so +# clear without further explanation. +# +# +############################################################################## + + +# LOG FILE +# This is the main log file where service and host events are logged +# for historical purposes. This should be the first option specified +# in the config file!!! + +log_file=@logDir@/naemon.log + + + +# OBJECT CONFIGURATION FILE(S) +# These are the object configuration files in which you define hosts, +# host groups, contacts, contact groups, services, etc. +# You can split your object definitions across several config files +# if you wish (as shown below), or keep them all in a single config file. + +# You can specify individual object config files as shown below: +#cfg_file=/etc/naemon/objects/commands.cfg +#cfg_file=/etc/naemon/objects/contacts.cfg +#cfg_file=/etc/naemon/objects/timeperiods.cfg +#cfg_file=/etc/naemon/objects/templates.cfg +cfg_file=@objectsFile@ + + +# You can also tell naemon to process all config files (with a .cfg +# extension) in a particular directory by using the cfg_dir +# directive as shown below: +#cfg_dir=/etc/naemon/conf.d + + + + +# OBJECT CACHE FILE +# This option determines where object definitions are cached when +# naemon starts/restarts. The CGIs read object definitions from +# this cache file (rather than looking at the object config files +# directly) in order to prevent inconsistencies that can occur +# when the config files are modified after naemon starts. + +object_cache_file=@varDir@/objects.cache + + + +# PRE-CACHED OBJECT FILE +# This options determines the location of the precached object file. +# If you run naemon with the -p command line option, it will preprocess +# your object configuration file(s) and write the cached config to this +# file. You can then start naemon with the -u option to have it read +# object definitions from this precached file, rather than the standard +# object configuration files (see the cfg_file and cfg_dir options above). +# Using a precached object file can speed up the time needed to (re)start +# the naemon process if you've got a large and/or complex configuration. +# Read the documentation section on optimizing naemon to find our more +# about how this feature works. + +precached_object_file=@varDir@/objects.precache + + + +# RESOURCE FILE +# This is an optional resource file that contains $USERx$ macro +# definitions. Multiple resource files can be specified by using +# multiple resource_file definitions. The CGIs will not attempt to +# read the contents of resource files, so information that is +# considered to be sensitive (usernames, passwords, etc) can be +# defined as macros in this file and restrictive permissions (600) +# can be placed on this file. + +resource_file=@resourceFile@ + + + +# STATUS FILE +# This is where the current status of all monitored services and +# hosts is stored. Its contents are read and processed by the CGIs. +# The contents of the status file are deleted every time naemon +# restarts. + +status_file=@varDir@/status.dat + + + +# STATUS FILE UPDATE INTERVAL +# This option determines the frequency (in seconds) that +# naemon will periodically dump program, host, and +# service status data. Set it to 0 to disable updates. + +status_update_interval=10 + + + +# EXTERNAL COMMAND OPTION +# This option allows you to specify whether or not Naemon should check +# for external commands (in the command file defined below). By default +# Naemon will *not* check for external commands, just to be on the +# cautious side. If you want to be able to use the CGI command interface +# you will have to enable this. +# Values: 0 = disable commands, 1 = enable commands + +check_external_commands=1 + + + +# EXTERNAL COMMAND FILE +# This is the file that Naemon checks for external command requests. +# It is also where the command CGI will write commands that are submitted +# by users, so it must be writeable by the user that the web server +# is running as (usually 'nobody'). Permissions should be set at the +# directory level instead of on the file, as the file is deleted every +# time its contents are processed. + +command_file=@runDir@/naemon.cmd + + + +# QUERY HANDLER INTERFACE +# This is the socket that is created for the Query Handler interface + +query_socket=@runDir@/naemon.qh + + + +# LOCK FILE +# This is the lockfile that Naemon will use to store its PID number +# in when it is running in daemon mode. + +lock_file=@runDir@/naemon.pid + + + +# TEMP FILE +# This is a temporary file that is used as scratch space when Naemon +# updates the status log, cleans the comment file, etc. This file +# is created, used, and deleted throughout the time that Naemon is +# running. + +temp_file=@cacheDir@/naemon.tmp + + + +# TEMP PATH +# This is path where Naemon can create temp files for service and +# host check results, etc. + +temp_path=@cacheDir@ + + + +# EVENT BROKER OPTIONS +# Controls what (if any) data gets sent to the event broker. +# Values: 0 = Broker nothing +# -1 = Broker everything +# = See documentation + +event_broker_options=-1 + + + +# EVENT BROKER MODULE(S) +# This directive is used to specify an event broker module that should +# be loaded by Naemon at startup. Use multiple directives if you want +# to load more than one module. Arguments that should be passed to +# the module at startup are seperated from the module path by a space. +# +# Example: +# +# broker_module= [moduleargs] + +#broker_module=/usr/lib/naemon/naemon-livestatus/livestatus.so @cacheDir@/live +#broker_module=/somewhere/module1.o +#broker_module=/somewhere/module2.o arg1 arg2=3 debug=0 + +# In order to provide drop-in support for new modules, you can also make use of +# the include_dir directive. The include_dir directive causes Naemon to parse +# any configuration (not just object configuration, as with cfg_dir) as if the +# contents of the files in the pointed-to directory was included on this line. +# The path to the directory is relative to the path of the main naemon.cfg +# file. +#include_dir=module-conf.d + +# LOG ARCHIVE PATH +# This is the directory where archived (rotated) log files are placed by the +# logrotate daemon. It is used by out of core add-ons to discover the logfiles. + +log_archive_path=@logDir@/archives + + + +# LOGGING OPTIONS +# If you want messages logged to the syslog facility, as well as the +# Naemon log file set this option to 1. If not, set it to 0. + +use_syslog=0 + +# NOTIFICATION LOGGING OPTION +# If you don't want notifications to be logged, set this value to 0. +# If notifications should be logged, set the value to 1. + +log_notifications=1 + +# Notification suppression reason (NSR) logging causes the reason for a +# notification suppression to be logged, when it occurs. This can potentially +# add some noise to your log file, but is highly useful when troubleshooting +# missing notifications. + +enable_notification_suppression_reason_logging=1 + + +# SERVICE RETRY LOGGING OPTION +# If you don't want service check retries to be logged, set this value +# to 0. If retries should be logged, set the value to 1. + +log_service_retries=1 + + + +# HOST RETRY LOGGING OPTION +# If you don't want host check retries to be logged, set this value to +# 0. If retries should be logged, set the value to 1. + +log_host_retries=1 + + + +# EVENT HANDLER LOGGING OPTION +# If you don't want host and service event handlers to be logged, set +# this value to 0. If event handlers should be logged, set the value +# to 1. + +log_event_handlers=1 + + + +# INITIAL STATES LOGGING OPTION +# If you want Naemon to log all initial host and service states to +# the main log file (the first time the service or host is checked) +# you can enable this option by setting this value to 1. If you +# are not using an external application that does long term state +# statistics reporting, you do not need to enable this option. In +# this case, set the value to 0. + +log_initial_states=0 + + + +# CURRENT STATES LOGGING OPTION +# If you don't want Naemon to log all current host and service states +# after log has been rotated to the main log file, you can disable this +# option by setting this value to 0. Default value is 1. + +log_current_states=1 + + + +# EXTERNAL COMMANDS LOGGING OPTION +# If you don't want Naemon to log external commands, set this value +# to 0. If external commands should be logged, set this value to 1. +# Note: This option does not include logging of passive service +# checks - see the option below for controlling whether or not +# passive checks are logged. + +log_external_commands=1 + + + +# PASSIVE CHECKS LOGGING OPTION +# If you don't want Naemon to log passive host and service checks, set +# this value to 0. If passive checks should be logged, set +# this value to 1. + +log_passive_checks=1 + + + +# GLOBAL HOST AND SERVICE EVENT HANDLERS +# These options allow you to specify a host and service event handler +# command that is to be run for every host or service state change. +# The global event handler is executed immediately prior to the event +# handler that you have optionally specified in each host or +# service definition. The command argument is the short name of a +# command definition that you define in your host configuration file. +# Read the HTML docs for more information. + +#global_host_event_handler=somecommand +#global_service_event_handler=somecommand + + + +# MAXIMUM CONCURRENT SERVICE CHECKS +# This option allows you to specify the maximum number of +# service checks that can be run in parallel at any given time. +# Specifying a value of 1 for this variable essentially prevents +# any service checks from being parallelized. A value of 0 +# will not restrict the number of concurrent checks that are +# being executed. + +max_concurrent_checks=0 + + +# CHECK RESULT PATH +# This is directory where Naemon reads check results of host and +# service checks to further process them. +# +# Note: Naemon does not require this folder internally but it still +# can be used to pass check results to Naemon. + +check_result_path=@cacheDir@/checkresults + + +# CACHED HOST CHECK HORIZON +# This option determines the maximum amount of time (in seconds) +# that the state of a previous host check is considered current. +# Cached host states (from host checks that were performed more +# recently that the timeframe specified by this value) can immensely +# improve performance in regards to the host check logic. +# Too high of a value for this option may result in inaccurate host +# states being used by Naemon, while a lower value may result in a +# performance hit for host checks. Use a value of 0 to disable host +# check caching. + +cached_host_check_horizon=15 + + + +# CACHED SERVICE CHECK HORIZON +# This option determines the maximum amount of time (in seconds) +# that the state of a previous service check is considered current. +# Cached service states (from service checks that were performed more +# recently that the timeframe specified by this value) can immensely +# improve performance in regards to predictive dependency checks. +# Use a value of 0 to disable service check caching. + +cached_service_check_horizon=15 + + + +# ENABLE PREDICTIVE HOST DEPENDENCY CHECKS +# This option determines whether or not Naemon will attempt to execute +# checks of hosts when it predicts that future dependency logic test +# may be needed. These predictive checks can help ensure that your +# host dependency logic works well. +# Values: +# 0 = Disable predictive checks +# 1 = Enable predictive checks (default) + +enable_predictive_host_dependency_checks=1 + + + +# ENABLE PREDICTIVE SERVICE DEPENDENCY CHECKS +# This option determines whether or not Naemon will attempt to execute +# checks of service when it predicts that future dependency logic test +# may be needed. These predictive checks can help ensure that your +# service dependency logic works well. +# Values: +# 0 = Disable predictive checks +# 1 = Enable predictive checks (default) + +enable_predictive_service_dependency_checks=1 + + + +# SOFT STATE DEPENDENCIES +# This option determines whether or not Naemon will use soft state +# information when checking host and service dependencies. Normally +# Naemon will only use the latest hard host or service state when +# checking dependencies. If you want it to use the latest state (regardless +# of whether its a soft or hard state type), enable this option. +# Values: +# 0 = Don't use soft state dependencies (default) +# 1 = Use soft state dependencies + +soft_state_dependencies=0 + + + +# TIME CHANGE ADJUSTMENT THRESHOLDS +# These options determine when Naemon will react to detected changes +# in system time (either forward or backwards). + +#time_change_threshold=900 + + + +# TIMEOUT VALUES +# These options control how much time Naemon will allow various +# types of commands to execute before killing them off. Options +# are available for controlling maximum time allotted for +# service checks, host checks, event handlers, notifications, the +# ocsp command, and performance data commands. All values are in +# seconds. + +service_check_timeout=60 +host_check_timeout=30 +event_handler_timeout=30 +notification_timeout=30 +ocsp_timeout=5 +perfdata_timeout=5 + + + +# RETAIN STATE INFORMATION +# This setting determines whether or not Naemon will save state +# information for services and hosts before it shuts down. Upon +# startup Naemon will reload all saved service and host state +# information before starting to monitor. This is useful for +# maintaining long-term data on state statistics, etc, but will +# slow Naemon down a bit when it (re)starts. Since its only +# a one-time penalty, I think its well worth the additional +# startup delay. + +retain_state_information=1 + + + +# STATE RETENTION FILE +# This is the file that Naemon should use to store host and +# service state information before it shuts down. The state +# information in this file is also read immediately prior to +# starting to monitor the network when Naemon is restarted. +# This file is used only if the retain_state_information +# variable is set to 1. + +state_retention_file=@varDir@/retention.dat + + + +# RETENTION DATA UPDATE INTERVAL +# This setting determines how often (in minutes) that Naemon +# will automatically save retention data during normal operation. +# If you set this value to 0, Naemon will not save retention +# data at regular interval, but it will still save retention +# data before shutting down or restarting. If you have disabled +# state retention, this option has no effect. + +retention_update_interval=60 + + + +# USE RETAINED PROGRAM STATE +# This setting determines whether or not Naemon will set +# program status variables based on the values saved in the +# retention file. If you want to use retained program status +# information, set this value to 1. If not, set this value +# to 0. + +use_retained_program_state=1 + + + +# USE RETAINED SCHEDULING INFO +# This setting determines whether or not Naemon will retain +# the scheduling info (next check time) for hosts and services +# based on the values saved in the retention file. If you +# If you want to use retained scheduling info, set this +# value to 1. If not, set this value to 0. + +use_retained_scheduling_info=1 + + +# RETAINED_SCHEDULING_RANDOMIZE_WINDOW +# If use_retained_scheduling info is enabled, this setting +# sets the window (in seconds), in which checks that were +# supposed to executed during a restart, is rescheduled. +# That is, if set to 60 seconds, then all checks that were +# missed due to a restart will be scheduled randomly to be +# executed in the first 60 seconds after a restart. +# If the retained_scheduling_randomize_window is larger than +# the objects check_interval, the check_interval is used +# instead. + +retained_scheduling_randomize_window=60 + + +# RETAINED ATTRIBUTE MASKS (ADVANCED FEATURE) +# The following variables are used to specify specific host and +# service attributes that should *not* be retained by Naemon during +# program restarts. +# +# The values of the masks are bitwise ANDs of values specified +# by the "MODATTR_" definitions found in include/common.h. +# For example, if you do not want the current enabled/disabled state +# of flap detection and event handlers for hosts to be retained, you +# would use a value of 24 for the host attribute mask... +# MODATTR_EVENT_HANDLER_ENABLED (8) + MODATTR_FLAP_DETECTION_ENABLED (16) = 24 + +# This mask determines what host attributes are not retained +retained_host_attribute_mask=0 + +# This mask determines what service attributes are not retained +retained_service_attribute_mask=0 + +# These two masks determine what process attributes are not retained. +# There are two masks, because some process attributes have host and service +# options. For example, you can disable active host checks, but leave active +# service checks enabled. +retained_process_host_attribute_mask=0 +retained_process_service_attribute_mask=0 + +# These two masks determine what contact attributes are not retained. +# There are two masks, because some contact attributes have host and +# service options. For example, you can disable host notifications for +# a contact, but leave service notifications enabled for them. +retained_contact_host_attribute_mask=0 +retained_contact_service_attribute_mask=0 + + + +# INTERVAL LENGTH +# This is the seconds per unit interval as used in the +# host/contact/service configuration files. Setting this to 60 means +# that each interval is one minute long (60 seconds). Other settings +# have not been tested much, so your mileage is likely to vary... + +interval_length=60 + + + +# AGGRESSIVE HOST CHECKING OPTION +# If you don't want to turn on aggressive host checking features, set +# this value to 0 (the default). Otherwise set this value to 1 to +# enable the aggressive check option. Read the docs for more info +# on what aggressive host check is or check out the source code in +# base/checks.c + +use_aggressive_host_checking=0 + + + +# SERVICE CHECK EXECUTION OPTION +# This determines whether or not Naemon will actively execute +# service checks when it initially starts. If this option is +# disabled, checks are not actively made, but Naemon can still +# receive and process passive check results that come in. Unless +# you're implementing redundant hosts or have a special need for +# disabling the execution of service checks, leave this enabled! +# Values: 1 = enable checks, 0 = disable checks + +execute_service_checks=1 + + + +# PASSIVE SERVICE CHECK ACCEPTANCE OPTION +# This determines whether or not Naemon will accept passive +# service checks results when it initially (re)starts. +# Values: 1 = accept passive checks, 0 = reject passive checks + +accept_passive_service_checks=1 + + + +# HOST CHECK EXECUTION OPTION +# This determines whether or not Naemon will actively execute +# host checks when it initially starts. If this option is +# disabled, checks are not actively made, but Naemon can still +# receive and process passive check results that come in. Unless +# you're implementing redundant hosts or have a special need for +# disabling the execution of host checks, leave this enabled! +# Values: 1 = enable checks, 0 = disable checks + +execute_host_checks=1 + + + +# PASSIVE HOST CHECK ACCEPTANCE OPTION +# This determines whether or not Naemon will accept passive +# host checks results when it initially (re)starts. +# Values: 1 = accept passive checks, 0 = reject passive checks + +accept_passive_host_checks=1 + + + +# NOTIFICATIONS OPTION +# This determines whether or not Naemon will sent out any host or +# service notifications when it is initially (re)started. +# Values: 1 = enable notifications, 0 = disable notifications + +enable_notifications=1 + + + +# EVENT HANDLER USE OPTION +# This determines whether or not Naemon will run any host or +# service event handlers when it is initially (re)started. Unless +# you're implementing redundant hosts, leave this option enabled. +# Values: 1 = enable event handlers, 0 = disable event handlers + +enable_event_handlers=1 + + + +# PROCESS PERFORMANCE DATA OPTION +# This determines whether or not Naemon will process performance +# data returned from service and host checks. If this option is +# enabled, host performance data will be processed using the +# host_perfdata_command (defined below) and service performance +# data will be processed using the service_perfdata_command (also +# defined below). Read the HTML docs for more information on +# performance data. +# Values: 1 = process performance data, 0 = do not process performance data + +process_performance_data=0 + + + +# HOST AND SERVICE PERFORMANCE DATA PROCESSING COMMANDS +# These commands are run after every host and service check is +# performed. These commands are executed only if the +# enable_performance_data option (above) is set to 1. The command +# argument is the short name of a command definition that you +# define in your host configuration file. Read the HTML docs for +# more information on performance data. + +#host_perfdata_command=process-host-perfdata +#service_perfdata_command=process-service-perfdata + + + +# HOST AND SERVICE PERFORMANCE DATA FILES +# These files are used to store host and service performance data. +# Performance data is only written to these files if the +# enable_performance_data option (above) is set to 1. + +#host_perfdata_file=@varDir@/host-perfdata +#service_perfdata_file=@varDir@/service-perfdata + + + +# HOST AND SERVICE PERFORMANCE DATA FILE TEMPLATES +# These options determine what data is written (and how) to the +# performance data files. The templates may contain macros, special +# characters (\t for tab, \r for carriage return, \n for newline) +# and plain text. A newline is automatically added after each write +# to the performance data file. Some examples of what you can do are +# shown below. + +#host_perfdata_file_template=[HOSTPERFDATA]\t$TIMET$\t$HOSTNAME$\t$HOSTEXECUTIONTIME$\t$HOSTOUTPUT$\t$HOSTPERFDATA$ +#service_perfdata_file_template=[SERVICEPERFDATA]\t$TIMET$\t$HOSTNAME$\t$SERVICEDESC$\t$SERVICEEXECUTIONTIME$\t$SERVICELATENCY$\t$SERVICEOUTPUT$\t$SERVICEPERFDATA$ + + + +# HOST AND SERVICE PERFORMANCE DATA FILE MODES +# This option determines whether or not the host and service +# performance data files are opened in write ("w") or append ("a") +# mode. If you want to use named pipes, you should use the special +# pipe ("p") mode which avoid blocking at startup, otherwise you will +# likely want the defult append ("a") mode. + +#host_perfdata_file_mode=a +#service_perfdata_file_mode=a + + + +# HOST AND SERVICE PERFORMANCE DATA FILE PROCESSING INTERVAL +# These options determine how often (in seconds) the host and service +# performance data files are processed using the commands defined +# below. A value of 0 indicates the files should not be periodically +# processed. + +#host_perfdata_file_processing_interval=0 +#service_perfdata_file_processing_interval=0 + + + +# HOST AND SERVICE PERFORMANCE DATA FILE PROCESSING COMMANDS +# These commands are used to periodically process the host and +# service performance data files. The interval at which the +# processing occurs is determined by the options above. + +#host_perfdata_file_processing_command=process-host-perfdata-file +#service_perfdata_file_processing_command=process-service-perfdata-file + + + +# HOST AND SERVICE PERFORMANCE DATA PROCESS EMPTY RESULTS +# These options determine wether the core will process empty perfdata +# results or not. This is needed for distributed monitoring, and intentionally +# turned on by default. +# If you don't require empty perfdata - saving some cpu cycles +# on unwanted macro calculation - you can turn that off. Be careful! +# Values: 1 = enable, 0 = disable + +#host_perfdata_process_empty_results=1 +#service_perfdata_process_empty_results=1 + + +# OBSESS OVER SERVICE CHECKS OPTION +# This determines whether or not Naemon will obsess over service +# checks and run the ocsp_command defined below. Unless you're +# planning on implementing distributed monitoring, do not enable +# this option. Read the HTML docs for more information on +# implementing distributed monitoring. +# Values: 1 = obsess over services, 0 = do not obsess (default) + +obsess_over_services=0 + + + +# OBSESSIVE COMPULSIVE SERVICE PROCESSOR COMMAND +# This is the command that is run for every service check that is +# processed by Naemon. This command is executed only if the +# obsess_over_services option (above) is set to 1. The command +# argument is the short name of a command definition that you +# define in your host configuration file. Read the HTML docs for +# more information on implementing distributed monitoring. + +#ocsp_command=somecommand + + + +# OBSESS OVER HOST CHECKS OPTION +# This determines whether or not Naemon will obsess over host +# checks and run the ochp_command defined below. Unless you're +# planning on implementing distributed monitoring, do not enable +# this option. Read the HTML docs for more information on +# implementing distributed monitoring. +# Values: 1 = obsess over hosts, 0 = do not obsess (default) + +obsess_over_hosts=0 + + + +# OBSESSIVE COMPULSIVE HOST PROCESSOR COMMAND +# This is the command that is run for every host check that is +# processed by Naemon. This command is executed only if the +# obsess_over_hosts option (above) is set to 1. The command +# argument is the short name of a command definition that you +# define in your host configuration file. Read the HTML docs for +# more information on implementing distributed monitoring. + +#ochp_command=somecommand + + + +# TRANSLATE PASSIVE HOST CHECKS OPTION +# This determines whether or not Naemon will translate +# DOWN/UNREACHABLE passive host check results into their proper +# state for this instance of Naemon. This option is useful +# if you have distributed or failover monitoring setup. In +# these cases your other Naemon servers probably have a different +# "view" of the network, with regards to the parent/child relationship +# of hosts. If a distributed monitoring server thinks a host +# is DOWN, it may actually be UNREACHABLE from the point of +# this Naemon instance. Enabling this option will tell Naemon +# to translate any DOWN or UNREACHABLE host states it receives +# passively into the correct state from the view of this server. +# Values: 1 = perform translation, 0 = do not translate (default) + +translate_passive_host_checks=0 + + + +# PASSIVE HOST CHECKS ARE SOFT OPTION +# This determines whether or not Naemon will treat passive host +# checks as being HARD or SOFT. By default, a passive host check +# result will put a host into a HARD state type. This can be changed +# by enabling this option. +# Values: 0 = passive checks are HARD, 1 = passive checks are SOFT + +passive_host_checks_are_soft=0 + + + +# ORPHANED HOST/SERVICE CHECK OPTIONS +# These options determine whether or not Naemon will periodically +# check for orphaned host service checks. Since service checks are +# not rescheduled until the results of their previous execution +# instance are processed, there exists a possibility that some +# checks may never get rescheduled. A similar situation exists for +# host checks, although the exact scheduling details differ a bit +# from service checks. Orphaned checks seem to be a rare +# problem and should not happen under normal circumstances. +# If you have problems with service checks never getting +# rescheduled, make sure you have orphaned service checks enabled. +# Values: 1 = enable checks, 0 = disable checks + +check_for_orphaned_services=1 +check_for_orphaned_hosts=1 + + + +# SERVICE FRESHNESS CHECK OPTION +# This option determines whether or not Naemon will periodically +# check the "freshness" of service results. Enabling this option +# is useful for ensuring passive checks are received in a timely +# manner. +# Values: 1 = enabled freshness checking, 0 = disable freshness checking + +check_service_freshness=1 + + + +# SERVICE FRESHNESS CHECK INTERVAL +# This setting determines how often (in seconds) Naemon will +# check the "freshness" of service check results. If you have +# disabled service freshness checking, this option has no effect. + +service_freshness_check_interval=60 + + + +# SERVICE CHECK TIMEOUT STATE +# This setting determines the state Naemon will report when a +# service check times out - that is does not respond within +# service_check_timeout seconds. This can be useful if a +# machine is running at too high a load and you do not want +# to consider a failed service check to be critical (the default). +# Valid settings are: +# c - Critical (default) +# u - Unknown +# w - Warning +# o - OK + +service_check_timeout_state=c + + + +# HOST FRESHNESS CHECK OPTION +# This option determines whether or not Naemon will periodically +# check the "freshness" of host results. Enabling this option +# is useful for ensuring passive checks are received in a timely +# manner. +# Values: 1 = enabled freshness checking, 0 = disable freshness checking + +check_host_freshness=0 + + + +# HOST FRESHNESS CHECK INTERVAL +# This setting determines how often (in seconds) Naemon will +# check the "freshness" of host check results. If you have +# disabled host freshness checking, this option has no effect. + +host_freshness_check_interval=60 + + + + +# ADDITIONAL FRESHNESS THRESHOLD LATENCY +# This setting determines the number of seconds that Naemon +# will add to any host and service freshness thresholds that +# it calculates (those not explicitly specified by the user). + +additional_freshness_latency=15 + + + + +# FLAP DETECTION OPTION +# This option determines whether or not Naemon will try +# and detect hosts and services that are "flapping". +# Flapping occurs when a host or service changes between +# states too frequently. When Naemon detects that a +# host or service is flapping, it will temporarily suppress +# notifications for that host/service until it stops +# flapping. Flap detection is very experimental, so read +# the HTML documentation before enabling this feature! +# Values: 1 = enable flap detection +# 0 = disable flap detection (default) + +enable_flap_detection=1 + + + +# FLAP DETECTION THRESHOLDS FOR HOSTS AND SERVICES +# Read the HTML documentation on flap detection for +# an explanation of what this option does. This option +# has no effect if flap detection is disabled. + +low_service_flap_threshold=5.0 +high_service_flap_threshold=20.0 +low_host_flap_threshold=5.0 +high_host_flap_threshold=20.0 + + + +# DATE FORMAT OPTION +# This option determines how short dates are displayed. Valid options +# include: +# us (MM-DD-YYYY HH:MM:SS) +# euro (DD-MM-YYYY HH:MM:SS) +# iso8601 (YYYY-MM-DD HH:MM:SS) +# strict-iso8601 (YYYY-MM-DDTHH:MM:SS) +# + +date_format=us + + + + +# TIMEZONE OFFSET +# This option is used to override the default timezone that this +# instance of Naemon runs in. If not specified, Naemon will use +# the system configured timezone. + +#use_timezone=US/Mountain +#use_timezone=Australia/Brisbane + + + +# ILLEGAL OBJECT NAME CHARACTERS +# This option allows you to specify illegal characters that cannot +# be used in host names, service descriptions, or names of other +# object types. + +illegal_object_name_chars=`~!$%^&*|'"<>?,()= + + + +# ILLEGAL MACRO OUTPUT CHARACTERS +# This option allows you to specify illegal characters that are +# stripped from macros before being used in notifications, event +# handlers, etc. This DOES NOT affect macros used in service or +# host check commands. +# The following macros are stripped of the characters you specify: +# $HOSTOUTPUT$ +# $HOSTPERFDATA$ +# $HOSTACKAUTHOR$ +# $HOSTACKCOMMENT$ +# $SERVICEOUTPUT$ +# $SERVICEPERFDATA$ +# $SERVICEACKAUTHOR$ +# $SERVICEACKCOMMENT$ + +illegal_macro_output_chars=`~$&|'"<> + + + +# REGULAR EXPRESSION MATCHING +# This option controls whether or not regular expression matching +# takes place in the object config files. Regular expression +# matching is used to match host, hostgroup, service, and service +# group names/descriptions in some fields of various object types. +# Values: 1 = enable regexp matching, 0 = disable regexp matching + +use_regexp_matching=0 + + + +# "TRUE" REGULAR EXPRESSION MATCHING +# This option controls whether or not "true" regular expression +# matching takes place in the object config files. This option +# only has an effect if regular expression matching is enabled +# (see above). If this option is DISABLED, regular expression +# matching only occurs if a string contains wildcard characters +# (* and ?). If the option is ENABLED, regexp matching occurs +# all the time (which can be annoying). +# Values: 1 = enable true matching, 0 = disable true matching + +use_true_regexp_matching=0 + + + +# ADMINISTRATOR EMAIL/PAGER ADDRESSES +# The email and pager address of a global administrator (likely you). +# Naemon never uses these values itself, but you can access them by +# using the $ADMINEMAIL$ and $ADMINPAGER$ macros in your notification +# commands. + +admin_email=naemon@localhost +admin_pager=pagenaemon@localhost + + + +# DEBUG LEVEL +# This option determines how much (if any) debugging information will +# be written to the debug file. OR values together to log multiple +# types of information. +# Values: +# -1 = Everything +# 0 = Nothing +# 1 = Functions +# 2 = Configuration +# 4 = Process information +# 8 = Scheduled events +# 16 = Host/service checks +# 32 = Notifications +# 64 = Event broker +# 128 = External commands +# 256 = Commands +# 512 = Scheduled downtime +# 1024 = Comments +# 2048 = Macros + +debug_level=0 + + + +# DEBUG VERBOSITY +# This option determines how verbose the debug log out will be. +# Values: 0 = Brief output +# 1 = More detailed +# 2 = Very detailed + +debug_verbosity=1 + + + +# DEBUG FILE +# This option determines where Naemon should write debugging information. + +debug_file=@cacheDir@/naemon.debug + + + +# MAX DEBUG FILE SIZE +# This option determines the maximum size (in bytes) of the debug file. If +# the file grows larger than this size, it will be renamed with a .old +# extension. If a file already exists with a .old extension it will +# automatically be deleted. This helps ensure your disk space usage doesn't +# get out of control when debugging Naemon. + +max_debug_file_size=1000000 + + + +# Should we allow hostgroups to have no hosts, we default this to off since +# that was the old behavior + +allow_empty_hostgroup_assignment=0 + + + +# Normally worker count is dynamically allocated based on 1.5 * number of cpu's +# with a minimum of 4 workers. This value will override the defaults + +#check_workers=3 + + +# DISABLE SERVICE CHECKS WHEN HOST DOWN +# This option will disable all service checks if the host is not in an UP state +# +# While desirable in some environments, enabling this value can distort report +# values as the expected quantity of checks will not have been performed + +#host_down_disable_service_checks=0 + + +# CIRCULAR DEPENDENCIES (EXPERIMENTAL) +# Allow for circular dependencies in naemon's host graph. +# Enabaling this will cause propagation the following to stop working: +# * scheduling downtime +# * enabling notification +# * disabling notification +# This feature is experimental and bugs might occur. + +allow_circular_dependencies=0 diff --git a/modules/private/default.nix b/modules/private/default.nix index 552ee8c..5f97f7f 100644 --- a/modules/private/default.nix +++ b/modules/private/default.nix @@ -64,6 +64,7 @@ set = { ftp = ./ftp.nix; mpd = ./mpd.nix; ssh = ./ssh; + monitoring = ./monitoring; system = ./system.nix; }; diff --git a/modules/private/monitoring/conf/contacts.cfg b/modules/private/monitoring/conf/contacts.cfg new file mode 100644 index 0000000..e088f69 --- /dev/null +++ b/modules/private/monitoring/conf/contacts.cfg @@ -0,0 +1,41 @@ +# vim: filetype=nagios + +# CONTACT GROUPS +define contactgroup { + contactgroup_name admins + alias Naemon Administrators +# members immae +} + +# No contact, we go through master +# define contact { +# contact_name immae +# alias Immae +# use generic-contact +# email ismael@bouya.org +# } +# +# define contact { +# name generic-contact +# host_notification_commands notify-host-by-email +# host_notification_options d,u,r,f,s +# host_notification_period 24x7 +# register 0 +# service_notification_commands notify-service-by-email +# service_notification_options w,u,c,r,f,s +# service_notification_period 24x7 +# } +# +# define command { +# command_name notify-host-by-email +# command_line SERVICENOTIFICATIONID="$SERVICENOTIFICATIONID$" HOSTSTATE="$HOSTSTATE$" HOSTOUTPUT="$HOSTOUTPUT$" $USER2$/notify_by_email host "$NOTIFICATIONTYPE$" "$HOSTALIAS$" "$LONGDATETIME$" "$CONTACTEMAIL$" $OVE +# #$OVE is to force naemon to run via shell instead of execve which fails here +# } +# +# # 'notify-service-by-email' command definition +# define command { +# command_name notify-service-by-email +# command_line SERVICENOTIFICATIONID="$SERVICENOTIFICATIONID$" SERVICEDESC="$SERVICEDESC$" SERVICESTATE="$SERVICESTATE$" SERVICEOUTPUT="$SERVICEOUTPUT$" $USER2$/notify_by_email service "$NOTIFICATIONTYPE$" "$HOSTALIAS$" "$LONGDATETIME$" "$CONTACTEMAIL$" $OVE +# # command_line sudo /usr/bin/strace -o /tmp/foo -vf -s 256 -u naemon $USER2$/notify_by_email +# #$OVE is to force naemon to run via shell instead of execve which fails here +# } diff --git a/modules/private/monitoring/conf/hosts.cfg b/modules/private/monitoring/conf/hosts.cfg new file mode 100644 index 0000000..d903b0a --- /dev/null +++ b/modules/private/monitoring/conf/hosts.cfg @@ -0,0 +1,32 @@ +# vim: filetype=nagios + +define host { + name generic-host + event_handler_enabled 1 + flap_detection_enabled 1 + notification_period 24x7 + notifications_enabled 1 + process_perf_data 1 + register 0 + retain_nonstatus_information 1 + retain_status_information 1 +} + +define host { + name linux-server + use generic-host + check_command check-host-alive + check_interval 5 + check_period 24x7 + contact_groups admins + max_check_attempts 10 + notification_interval 120 + notification_options d,u,r,f + register 0 + retry_interval 1 +} + +define command { + command_name check-host-alive + command_line $USER1$/check_ping -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 5 +} diff --git a/modules/private/monitoring/conf/local_services.cfg b/modules/private/monitoring/conf/local_services.cfg new file mode 100644 index 0000000..56bc8f6 --- /dev/null +++ b/modules/private/monitoring/conf/local_services.cfg @@ -0,0 +1,68 @@ +# vim: filetype=nagios + +# System usage +define service { + service_description Size on root partition + use local-service + check_command check_local_disk!20%!10%!/ +} +define command { + command_line $USER1$/check_disk -w $ARG1$ -c $ARG2$ -p $ARG3$ + command_name check_local_disk +} + +define service { + service_description Total number of process + use local-service + check_command check_local_procs!250!400!RSZDT +} +define command { + command_line $USER1$/check_procs -w $ARG1$ -c $ARG2$ -s $ARG3$ + command_name check_local_procs +} + +define service { + service_description Average load + use local-service + check_command check_local_load!8.0,8.0,8.0!10.0,10.0,10.0 +} +define command { + command_line $USER1$/check_load -w $ARG1$ -c $ARG2$ + command_name check_local_load +} + +define service { + service_description Swap usage + use local-service + check_command check_local_swap!20!10 +} +define command { + command_line $USER1$/check_swap -n ok -w $ARG1$ -c $ARG2$ + command_name check_local_swap +} + +define service { + service_description Memory usage + use local-service + check_command check_memory!80!90 +} +define command { + command_line $USER2$/check_mem.sh -w $ARG1$ -c $ARG2$ + command_name check_memory +} + +define command { + command_line $USER2$/check_command -c "$ARG1$" -s 0 -o "$ARG2$" $ARG3$ + command_name check_command_output +} + +# Network dependent local services +define service { + service_description NTP is activated and working + use local-service + check_command check_ntp +} +define command { + command_line $USER1$/check_ntp_time -t 30 -q -H 0.arch.pool.ntp.org + command_name check_ntp +} diff --git a/modules/private/monitoring/conf/notify.cfg b/modules/private/monitoring/conf/notify.cfg new file mode 100644 index 0000000..63b380d --- /dev/null +++ b/modules/private/monitoring/conf/notify.cfg @@ -0,0 +1,8 @@ +# vim: filetype=nagios + +define command { + command_line /etc/naemon/send_nrdp.sh -H "$HOSTADDRESS$" -s "$SERVICEDESC$" -S "$SERVICESTATEID$" -o "$SERVICEOUTPUT$" + command_name notify-master +} + + diff --git a/modules/private/monitoring/conf/objects.cfg b/modules/private/monitoring/conf/objects.cfg new file mode 100644 index 0000000..653477f --- /dev/null +++ b/modules/private/monitoring/conf/objects.cfg @@ -0,0 +1,84 @@ +# vim: filetype=nagios + +define command { + command_line $USER1$/check_ping -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 5 + command_name check-host-alive +} + +define command { + command_line $USER2$/check_md_raid + command_name check_md_raid +} + +define command { + command_line $USER2$/check_command -c "$ARG1$" -o "$ARG2$" $ARG3$ + command_name check_command_output +} + + +define command { + command_line /usr/bin/sudo -u postgres $USER2$/check_postgres_replication "$ARG1$" "$ARG2$" "$ARG3$" + command_name check_postgresql_replication +} + +define service { + ## --PUPPET_NAME-- (called '_naginator_name' in the manifest) Databases are present in postgresql + active_checks_enabled 1 + check_command check_command_output!psql -c 'select nspname from pg_catalog.pg_namespace'!public!-r postgres + check_freshness 0 + check_interval 5 + check_period 24x7 + contact_groups admins + event_handler_enabled 1 + flap_detection_enabled 1 + host_name caldance-1.v.immae.eu + is_volatile 0 + max_check_attempts 4 + notification_interval 60 + notification_options w,u,c,r + notification_period 24x7 + notifications_enabled 0 + obsess_over_service 1 + passive_checks_enabled 1 + process_perf_data 1 + retain_nonstatus_information 1 + retain_status_information 1 + retry_interval 1 + service_description Databases are present in postgresql +} + +define command { + command_line $USER2$/check_last_file_date "$ARG1$" "$ARG2$" "$ARG3$" + command_name check_last_file_date +} + +define command { + command_line $USER2$/check_date "$ARG1$" "$ARG2$" "$ARG3$" + command_name check_date +} + +define service { + ## --PUPPET_NAME-- (called '_naginator_name' in the manifest) Postgresql replication for backup-1 is up to date + active_checks_enabled 1 + check_command check_postgresql_replication!backup-1!/run/postgresql!5432 + check_freshness 0 + check_interval 5 + check_period 24x7 + contact_groups admins + event_handler_enabled 1 + flap_detection_enabled 1 + host_name caldance-1.v.immae.eu + is_volatile 0 + max_check_attempts 4 + notification_interval 60 + notification_options w,u,c,r + notification_period 24x7 + notifications_enabled 0 + obsess_over_service 1 + passive_checks_enabled 1 + process_perf_data 1 + retain_nonstatus_information 1 + retain_status_information 1 + retry_interval 1 + service_description Postgresql replication for backup-1 is up to date +} diff --git a/modules/private/monitoring/conf/services.cfg b/modules/private/monitoring/conf/services.cfg new file mode 100644 index 0000000..0740dc7 --- /dev/null +++ b/modules/private/monitoring/conf/services.cfg @@ -0,0 +1,27 @@ +# vim: filetype=nagios + +define service { + name generic-service + active_checks_enabled 1 + check_freshness 0 + check_interval 10 + check_period 24x7 + contact_groups admins + event_handler_enabled 1 + flap_detection_enabled 1 + is_volatile 0 + max_check_attempts 3 + notification_interval 60 + notification_options w,u,c,r,f + notification_period 24x7 + # no notification since we send them to master + notifications_enabled 0 + obsess_over_service 1 + passive_checks_enabled 1 + process_perf_data 1 + register 0 + retain_nonstatus_information 1 + retain_status_information 1 + retry_interval 2 +} + diff --git a/modules/private/monitoring/conf/timeperiods.cfg b/modules/private/monitoring/conf/timeperiods.cfg new file mode 100644 index 0000000..5ffe4ca --- /dev/null +++ b/modules/private/monitoring/conf/timeperiods.cfg @@ -0,0 +1,15 @@ +# vim: filetype=nagios + +define timeperiod { + alias 24 Hours A Day, 7 Days A Week + friday 00:00-24:00 + monday 00:00-24:00 + saturday 00:00-24:00 + sunday 00:00-24:00 + thursday 00:00-24:00 + timeperiod_name 24x7 + tuesday 00:00-24:00 + wednesday 00:00-24:00 +} + + diff --git a/modules/private/monitoring/default.nix b/modules/private/monitoring/default.nix new file mode 100644 index 0000000..11861ad --- /dev/null +++ b/modules/private/monitoring/default.nix @@ -0,0 +1,111 @@ +{ config, myconfig, pkgs, lib, ... }: +let + myplugins = pkgs.runCommand "buildplugins" { + buildInputs = [ pkgs.makeWrapper pkgs.perl ]; + } '' + mkdir $out + cp ${./plugins}/* $out/ + patchShebangs $out + wrapProgram $out/check_command --prefix PATH : ${config.security.wrapperDir} + wrapProgram $out/send_nrdp.sh --prefix PATH : ${lib.makeBinPath [ + pkgs.curl pkgs.which pkgs.coreutils + ]} + wrapProgram $out/check_mem.sh --prefix PATH : ${lib.makeBinPath [ + pkgs.gnugrep pkgs.gawk pkgs.procps-ng + ]} + ''; +in +{ + options = { + myServices.monitoring.enable = lib.mkOption { + type = lib.types.bool; + default = false; + description = '' + Whether to enable monitoring. + ''; + }; + }; + + config = lib.mkIf config.myServices.monitoring.enable { + security.sudo.extraRules = [ + { + commands = [ + { command = "${pkgs.mdadm}/bin/mdadm --monitor --scan -1"; options = [ "NOPASSWD" ]; } + { command = "${pkgs.postfix}/bin/mailq"; options = [ "NOPASSWD" ]; } + ]; + users = [ "naemon" ]; + runAs = "root"; + } + ]; + environment.etc."mdadm.conf" = { + enable = true; + mode = "0644"; + user = "root"; + text = "MAILADDR naemon@immae.eu"; + }; + + # needed since extraResource is not in the closure + systemd.services.naemon.path = [ myplugins ]; + services.naemon = { + enable = true; + extraConfig = '' + broker_module=${pkgs.naemon-livestatus}/lib/naemon-livestatus/livestatus.so ${config.services.naemon.runDir}/live + use_syslog=1 + log_initial_states=1 + date_format=iso8601 + admin_email=naemon@immae.eu + + obsess_over_services=1 + ocsp_command=notify-master + ''; + extraResource = '' + $USER2$=${myplugins} + $USER200$=${myconfig.env.monitoring.status_url} + $USER201$=${myconfig.env.monitoring.status_token} + ''; + objectDefs = builtins.readFile ./conf/local_services.cfg + + builtins.readFile ./conf/timeperiods.cfg + + builtins.readFile ./conf/services.cfg + + builtins.readFile ./conf/contacts.cfg + + builtins.readFile ./conf/hosts.cfg + + '' + define command { + command_line ${myplugins}/send_nrdp.sh -u "$USER200$" -t "$USER201$" -H "$HOSTADDRESS$" -s "$SERVICEDESC$" -S "$SERVICESTATEID$" -o "$SERVICEOUTPUT$" + command_name notify-master + } + define service { + service_description No mdadm array is degraded + use local-service + check_command check_command_output!${pkgs.mdadm}/bin/mdadm --monitor --scan -1!^$!-s 0 -r root + } + + define service { + service_description mailq is empty + use local-service + check_command check_mailq + } + + define command { + command_name check_mailq + command_line $USER1$/check_mailq -s -w 1 -c 2 + } + + define service { + name local-service + use generic-service + host_name eldiron.immae.eu + check_interval 5 + max_check_attempts 4 + register 0 + retry_interval 1 + } + define host { + host_name eldiron.immae.eu + alias eldiron.immae.eu + address eldiron.immae.eu + use linux-server + } + ''; + }; + }; +} diff --git a/modules/private/monitoring/plugins/check_command b/modules/private/monitoring/plugins/check_command new file mode 100755 index 0000000..55779fd --- /dev/null +++ b/modules/private/monitoring/plugins/check_command @@ -0,0 +1,113 @@ +#!/usr/bin/env perl + +use strict; +use Getopt::Std; +$| = 1; + +my %opts; +getopts('hr:C:c:s:o:', \%opts); + +my $STATE_OK = 0; +my $STATE_WARNING = 1; +my $STATE_CRITICAL = 2; +my $STATE_UNKNOWN = 3; + +if ($opts{'h'} || scalar(%opts) == 0) { + &print_help(); + exit($STATE_OK); +} + +my $command = $opts{'c'}; +if ($command eq '') { + print "You must provide a command to check.\n"; + exit($STATE_UNKNOWN); +} + +my $expected_output = $opts{'o'}; +my $expected_status = $opts{'s'}; +my $other_command = $opts{'C'}; + +if ($other_command eq '' and $expected_status eq '' and $expected_output eq '') { + $expected_status = 0; +} + +my $cmd = $command . ' 2>&1'; +my $other_cmd; +if ($other_command ne '') { + $other_cmd = $other_command . ' 2>&1'; +} + +my $run_as; +if ($opts{'r'}) { + $run_as = $opts{'r'}; + $cmd = "sudo -u $run_as -n $cmd"; + + if ($other_command ne '') { + $other_cmd = "sudo -u $run_as -n $other_cmd"; + } + +} + +my $cmd_result = `$cmd`; +my $other_cmd_result; +if ($other_command ne '') { + $other_cmd_result = `$other_cmd`; + chomp($other_cmd_result); +} + +chomp($cmd_result); +if ($cmd_result =~ /sudo/i) { + print "$command CRITICAL - No sudo right to run the command\n"; + exit($STATE_UNKNOWN); +} elsif ($expected_status ne '') { + if ($? != $expected_status) { + print "$command CRITICAL - Response status $?\n"; + exit($STATE_CRITICAL); + } else { + print "$command OK - Response status $?\n"; + exit($STATE_OK); + } +} elsif ($other_command ne '') { + if ($cmd_result ne $other_cmd_result) { + print "$command CRITICAL - Expected output not matching other command output\n"; + exit($STATE_CRITICAL); + } else { + print "$command OK - Expected output matching other command output\n"; + exit($STATE_OK); + } +} else { + if ($cmd_result !~ /$expected_output/) { + print "$command CRITICAL - Expected output not matching\n"; + exit($STATE_CRITICAL); + } else { + print "$command OK - Expected output matching\n"; + exit($STATE_OK); + } +} + +sub print_help() { + print << "EOF"; +Check whether the given command responds as expected. One of -o -C or -s must be selected. + +Options: +-h + Print detailed help screen + +-c + command to run (required) + +-C + other command to compare output + +-r user + Run as user via sudo. + +-s + status code to check + +-o + output to check + +EOF +} + diff --git a/modules/private/monitoring/plugins/check_mem.sh b/modules/private/monitoring/plugins/check_mem.sh new file mode 100755 index 0000000..cc97ae2 --- /dev/null +++ b/modules/private/monitoring/plugins/check_mem.sh @@ -0,0 +1,29 @@ +#!/bin/bash + +if [ "$1" = "-w" ] && [ "$2" -gt "0" ] && [ "$3" = "-c" ] && [ "$4" -gt "0" ]; then + FreeM=`free -m` + memTotal_m=`echo "$FreeM" |grep Mem |awk '{print $2}'` + memUsed_m=`echo "$FreeM" |grep Mem |awk '{print $3}'` + memFree_m=`echo "$FreeM" |grep Mem |awk '{print $4}'` + memBuffer_m=`echo "$FreeM" |grep Mem |awk '{print $6}'` + memCache_m=`echo "$FreeM" |grep Mem |awk '{print $7}'` + memUsedPrc=`echo $((($memUsed_m*100)/$memTotal_m))||cut -d. -f1` + if [ "$memUsedPrc" -ge "$4" ]; then + echo "Memory: CRITICAL Total: $memTotal_m MB - Used: $memUsed_m MB - $memUsedPrc% used!|TOTAL=$memTotal_m;;;; USED=$memUsed_m;;;; CACHE=$memCache_m;;;; BUFFER=$memBuffer_m;;;;" + exit 2 + elif [ "$memUsedPrc" -ge "$2" ]; then + echo "Memory: WARNING Total: $memTotal_m MB - Used: $memUsed_m MB - $memUsedPrc% used!|TOTAL=$memTotal_m;;;; USED=$memUsed_m;;;; CACHE=$memCache_m;;;; BUFFER=$memBuffer_m;;;;" + exit 1 + else + echo "Memory: OK Total: $memTotal_m MB - Used: $memUsed_m MB - $memUsedPrc% used|TOTAL=$memTotal_m;;;; USED=$memUsed_m;;;; CACHE=$memCache_m;;;; BUFFER=$memBuffer_m;;;;" + exit 0 + fi +else # If inputs are not as expected, print help. + sName="`echo $0|awk -F '/' '{print $NF}'`" + echo -e "\n\n\t\t### $sName Version 2.0###\n" + echo -e "# Usage:\t$sName -w -c " + echo -e "\t\t= warnlevel and critlevel is percentage value without %\n" + echo "# EXAMPLE:\t/usr/lib64/nagios/plugins/$sName -w 80 -c 90" + echo -e "\nCopyright (C) 2012 Lukasz Gogolin (lukasz.gogolin@gmail.com), improved by Nestor 2015\n\n" + exit +fi diff --git a/modules/private/monitoring/plugins/notify_by_email b/modules/private/monitoring/plugins/notify_by_email new file mode 100755 index 0000000..ad0dcc7 --- /dev/null +++ b/modules/private/monitoring/plugins/notify_by_email @@ -0,0 +1,31 @@ +#!/usr/bin/env bash + +# $1 = service/host + +# $2 = type (PROBLEM RECOVERY ACKNOWLEDGEMENT FLAPPINGSTART FLAPPINGSTOP FLAPPINGDISABLED DOWNTIMESTART DOWNTIMESTOP DOWNTIMECANCELLED) +# http://www.naemon.org/documentation/usersguide/macrolist.html#notificationtype + +# $3 = host alias + +# $4 = date (YYYY-MM-DDTHH:MM:SS) + +# $5 = E-mail + +NOTIFICATION_TYPE="$2" +HOST_ALIAS="$3" +DATE="$4" +CONTACT="$5" + +message="" + +if [ "$1" = "host" ]; then + message=$(printf "%b" "***** Naemon *****\n\nNotification Type: $NOTIFICATION_TYPE\n\nHost: $HOST_ALIAS\nState: $HOSTSTATE\nInfo: $HOSTOUTPUT\n\nDate/Time: $DATE\n") + subject="** $NOTIFICATION_TYPE Host Alert: $HOST_ALIAS is $HOSTSTATE **" +else + message=$(printf "%b" "***** Naemon *****\n\nNotification Type: $NOTIFICATION_TYPE\n\nService: $SERVICEDESC\nHost: $HOST_ALIAS\nState: $SERVICESTATE\n\nDate/Time: $DATE\n\nAdditional Info:\n\n$SERVICEOUTPUT\n") + subject="** $NOTIFICATION_TYPE Service Alert: $HOST_ALIAS/$SERVICEDESC is $SERVICESTATE **" +fi + +# sendwait waits for sendmail to finish its job, otherwise it continues in the +# background and gets killed too early +echo "$message" | MAILRC=/dev/null mail -r "naemon@immae.eu" -n -Ssendwait -s "$subject" "$CONTACT" diff --git a/modules/private/monitoring/plugins/send_nrdp.sh b/modules/private/monitoring/plugins/send_nrdp.sh new file mode 100755 index 0000000..27e47b4 --- /dev/null +++ b/modules/private/monitoring/plugins/send_nrdp.sh @@ -0,0 +1,267 @@ +#!/bin/bash +# +# check_nrdp.sh +# +# Copyright (c) 2010-2017 - Nagios Enterprises, LLC. +# Written by: Scott Wilkerson (nagios@nagios.org) +# +# 2017-09-25 Troy Lea aka BOX293 +# - Fixed script not working with arguments when run as a cron job +# or if being used as a nagios command like obsessive compulsive. +# ... "if [ ! -t 0 ]" was the reason why. +# 2017-12-08 Jørgen van der Meulen (Conclusion Xforce) +# - Fixed typo in NRDP abbreviation + + +PROGNAME=$(basename $0) +RELEASE="Revision 0.6.1" + +print_release() { + echo "$RELEASE" +} + +print_usage() { + echo "" + echo "$PROGNAME $RELEASE - Send NRDP script for Nagios" + echo "" + echo "Usage: send_nrdp.sh -u URL -t token [options]" + echo "" + echo "Usage: $PROGNAME -h display help" + echo "" +} + +print_help() { + print_usage + echo "" + echo "This script is used to send NRDP data to a Nagios server" + echo "" + echo "Required:" + echo " -u"," URL of NRDP server. Usually http:///nrdp/" + echo " -t"," Shared token. Must be the same token set in NRDP Server" + echo "" + echo "Options:" + echo " Single Check:" + echo " -H host name" + echo " -s service name" + echo " -S State" + echo " -o output" + echo "" + echo " STDIN:" + echo " [-d delimiter] (default -d \"\\t\")" + echo " With only the required parameters $PROGNAME is capable of" + echo " processing data piped to it either from a file or other" + echo " process. By default, we use \t as the delimiter however this" + echo " may be specified with the -d option data should be in the" + echo " following formats one entry per line." + echo " For Host checks:" + echo " hostname State output" + echo " For Service checks" + echo " hostname servicename State output" + echo "" + echo " File:" + echo " -f /full/path/to/file" + echo " This file will be sent to the NRDP server specified in -u" + echo " The file should be an XML file in the following format" + echo " ##################################################" + echo "" + echo " " + echo " " + echo " " + echo " YOUR_HOSTNAME" + echo " 0" + echo " OK|perfdata=1.00;5;10;0" + echo " " + echo " " + echo " YOUR_HOSTNAME" + echo " YOUR_SERVICENAME" + echo " 0" + echo " OK|perfdata=1.00;5;10;0" + echo " " + echo " " + echo " ##################################################" + echo "" + echo " Directory:" + echo " -D /path/to/temp/dir" + echo " This is a directory that contains XML files in the format" + echo " above. Additionally, if the -d flag is specified, $PROGNAME" + echo " will create temp files here if the server could not be reached." + echo " On additional calls with the same -D path, if a connection to" + echo " the server is successful, all temp files will be sent." + exit 0 +} + +send_data() { + pdata="token=$token&cmd=submitcheck" + if [ $file ]; then + fdata="--data-urlencode XMLDATA@$file" + rslt=`curl -f --silent --insecure -d "$pdata" $fdata "$url/"` + else + pdata="$pdata&XMLDATA=$1" + rslt=`curl -f --silent --insecure -d "$pdata" "$url/"` + fi + + ret=$? + + status=`echo $rslt | sed -n 's|.*\(.*\).*|\1|p'` + message=`echo $rslt | sed -n 's|.*\(.*\).*|\1|p'` + if [ $ret != 0 ];then + echo "ERROR: could not connect to NRDP server at $url" + # verify we are not processing the directory already and then write to the directory + if [ ! "$2" ] && [ $directory ];then + if [ ! -d "$directory" ];then + mkdir -p "$directory" + fi + # This is where we write to the tmp directory + echo $xml > `mktemp $directory/nrdp.XXXXXX` + fi + exit 1 + fi + + if [ "$status" != "0" ];then + # This means we couldn't connect to NRPD server + echo "ERROR: The NRDP Server said $message" + # verify we are not processing the directory already and then write to the directory + if [ ! "$2" ] && [ $directory ];then + if [ ! -d "$directory" ];then + mkdir -p "$directory" + fi + # This is where we write to the tmp directory + echo $xml > `mktemp $directory/nrdp.XXXXXX` + fi + + exit 2 + fi + + # If this was a directory call and was successful, remove the file + if [ $2 ] && [ "$status" == "0" ];then + rm -f "$2" + fi + + # If we weren't successful error + if [ $ret != 0 ];then + echo "exited with error "$ret + exit $ret + fi +} + +while getopts "u:t:H:s:S:o:f:d:c:D:hv" option +do + case $option in + u) url=$OPTARG ;; + t) token=$OPTARG ;; + H) host=$OPTARG ;; + s) service=$OPTARG ;; + S) State=$OPTARG ;; + o) output=$OPTARG ;; + f) file=$OPTARG ;; + d) delim=$OPTARG ;; + c) checktype=$OPTARG ;; + D) directory=$OPTARG ;; + h) print_help 0;; + v) print_release + exit 0 ;; + esac +done + +if [ ! $checktype ]; then + checktype=1 +fi +if [ ! $delim ]; then + delim=`echo -e "\t"` +fi + +if [ "x$url" == "x" -o "x$token" == "x" ] +then + echo "Usage: send_nrdp -u url -t token" + exit 1 +fi +# detecting curl +if [[ `which curl` =~ "/curl" ]] + then curl=1; +fi + +if [[ ! $curl ]]; +then + echo "Either curl or wget are required to run $PROGNAME" + exit 1 +fi + +checkcount=0 + +if [ $host ]; then + xml="" + # we are not getting piped results + if [ "$host" == "" ] || [ "$State" == "" ]; then + echo "You must provide a host -H and State -S" + exit 2 + fi + if [ "$service" != "" ]; then + xml="$xml$service" + else + xml="$xml" + fi + + # urlencode XML special chars + output=${output//&/%26} + output=${output///%3E} + + xml="$xml$host$State" + checkcount=1 +fi + + # If only url and token have been provided then it is assumed that data is being piped +######################## +if [[ ! $host && ! $State && ! $file && ! $directory ]]; then + xml="" + # we know we are being piped results + IFS=$delim + + while read -r line ; do + arr=($line) + if [ ${#arr[@]} != 0 ];then + if [[ ${#arr[@]} < 3 ]] || [[ ${#arr[@]} > 4 ]];then + echo "ERROR: STDIN must be either 3 or 4 fields long, I found "${#arr[@]} + else + if [ ${#arr[@]} == 4 ]; then + xml="$xml + ${arr[1]} + ${arr[0]} + ${arr[2]} + ${arr[3]}" + else + xml="$xml + ${arr[0]} + ${arr[1]} + ${arr[2]}" + fi + + xml="$xml" + checkcount=$[checkcount+1] + fi + fi + done + IFS=" " +fi + +if [ $file ]; then + xml=`cat $file` + send_data "$xml" +fi + +if [ $directory ]; then + #echo "Processing directory..." + for f in `ls $directory` + do + #echo "Processing $f file..." + # take action on each file. $f store current file name + xml=`cat $directory/$f` + send_data "$xml" "$directory/$f" + done +fi + +if [ "x$file" == "x" ] && [ "x$directory" == "x" ]; then + xml="$xml" + send_data "$xml" + echo "Sent $checkcount checks to $url" +fi diff --git a/modules/private/system/eldiron.nix b/modules/private/system/eldiron.nix index df40187..22de37e 100644 --- a/modules/private/system/eldiron.nix +++ b/modules/private/system/eldiron.nix @@ -24,6 +24,7 @@ myServices.buildbot.enable = true; myServices.databases.enable = true; myServices.gitolite.enable = true; + myServices.monitoring.enable = true; myServices.irc.enable = true; myServices.pub.enable = true; myServices.tasks.enable = true; diff --git a/pkgs/default.nix b/pkgs/default.nix index ff9d477..4949573 100644 --- a/pkgs/default.nix +++ b/pkgs/default.nix @@ -42,6 +42,10 @@ rec { composerEnv = callPackage ./composer-env {}; webapps = callPackage ./webapps { inherit mylibs composerEnv private; }; + monitoring-plugins = callPackage ./monitoring-plugins {}; + naemon = callPackage ./naemon { inherit mylibs monitoring-plugins; }; + naemon-livestatus = callPackage ./naemon-livestatus { inherit mylibs naemon; }; + private = if builtins.pathExists (./. + "/private") then import ./private { inherit pkgs; } else { webapps = {}; }; diff --git a/pkgs/monitoring-plugins/default.nix b/pkgs/monitoring-plugins/default.nix new file mode 100644 index 0000000..852d29b --- /dev/null +++ b/pkgs/monitoring-plugins/default.nix @@ -0,0 +1,33 @@ +{ stdenv, iputils, fetchpatch, fetchurl, file, hostname, perl, openssl, + bind, openldap, procps-ng, postfix, + wrapperDir ? "/run/wrappers/bin" +}: +stdenv.mkDerivation rec { + pname = "monitoring-plugins"; + version = "2.2"; + name = "${pname}-${version}"; + + src = fetchurl { + url = "https://www.monitoring-plugins.org/download/${name}.tar.gz"; + sha256 = "0r9nvnk64nv7k8w352n99lw4p92pycxd9wlga9zyzjx9027m6si9"; + }; + + patches = [ + (fetchpatch { + name = "mariadb.patch"; + url = "https://git.archlinux.org/svntogit/community.git/plain/trunk/0001-mariadb.patch?h=packages/monitoring-plugins"; + sha256 = "0jf6fqkyzag66rid92m7asnr2dp8rr8kn4zjvhqg0mqvf8imppky"; + }) + ]; + + # ping needs CAP_NET_RAW capability which is set only in the wrappers namespace + configurePhase = '' + ./configure --disable-static --disable-dependency-tracking \ + --prefix=$out \ + --with-ping-command="${wrapperDir}/ping -4 -n -U -w %d -c %d %s" \ + --with-ping6-command="${wrapperDir}/ping -6 -n -U -w %d -c %d %s" \ + --with-sudo-command="${wrapperDir}/sudo" + ''; + + buildInputs = [ perl file hostname iputils openssl openldap procps-ng bind.dnsutils postfix ]; +} diff --git a/pkgs/naemon-livestatus/default.nix b/pkgs/naemon-livestatus/default.nix new file mode 100644 index 0000000..46ef51a --- /dev/null +++ b/pkgs/naemon-livestatus/default.nix @@ -0,0 +1,23 @@ +{ stdenv, mylibs, autoconf, automake, + libtool, pkg-config, naemon, + varDir ? "/var/lib/naemon", + etcDir ? "/etc/naemon" +}: +stdenv.mkDerivation (mylibs.fetchedGithub ./naemon-livestatus.json // { + preConfigure = '' + ./autogen.sh || true + ''; + + configureFlags = [ + "--localstatedir=${varDir}" + "--sysconfdir=${etcDir}" + ]; + + preInstall = '' + substituteInPlace Makefile --replace \ + '@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am' \ + '@$(MAKE) $(AM_MAKEFLAGS) install-exec-am' + ''; + + buildInputs = [ autoconf automake libtool pkg-config naemon ]; +}) diff --git a/pkgs/naemon-livestatus/naemon-livestatus.json b/pkgs/naemon-livestatus/naemon-livestatus.json new file mode 100644 index 0000000..c648d2b --- /dev/null +++ b/pkgs/naemon-livestatus/naemon-livestatus.json @@ -0,0 +1,15 @@ +{ + "tag": "33dbcfe-master", + "meta": { + "name": "naemon-livestatus", + "url": "https://github.com/naemon/naemon-livestatus", + "branch": "master" + }, + "github": { + "owner": "naemon", + "repo": "naemon-livestatus", + "rev": "33dbcfe18e42158f25c27cff95a1e07b73be53b0", + "sha256": "16jk0c6pwr7ck0g6s12hj6czbhgdr7c7f74zzsp5279af86y8fd6", + "fetchSubmodules": true + } +} diff --git a/pkgs/naemon/default.nix b/pkgs/naemon/default.nix new file mode 100644 index 0000000..080a226 --- /dev/null +++ b/pkgs/naemon/default.nix @@ -0,0 +1,34 @@ +{ stdenv, mylibs, help2man, monitoring-plugins, autoconf, automake, + libtool, glib, pkg-config, gperf, + varDir ? "/var/lib/naemon", + etcDir ? "/etc/naemon", + cacheDir ? "/var/cache/naemon", + logDir ? "/var/log/naemon", + runDir ? "/run/naemon", + user ? "naemon", + group ? "naemon" +}: +stdenv.mkDerivation (mylibs.fetchedGithub ./naemon.json // { + preConfigure = '' + ./autogen.sh || true + ''; + + configureFlags = [ + "--localstatedir=${varDir}" + "--sysconfdir=${etcDir}" + "--with-pkgconfdir=${etcDir}" + "--with-pluginsdir=${monitoring-plugins}/libexec" + "--with-tempdir=${cacheDir}" + "--with-checkresultdir=${cacheDir}/checkresults" + "--with-logdir=${logDir}" + "--with-naemon-user=${user}" + "--with-naemon-group=${group}" + "--with-lockfile=${runDir}/naemon.pid" + ]; + + preInstall = '' + substituteInPlace Makefile --replace '$(MAKE) $(AM_MAKEFLAGS) install-exec-hook' "" + ''; + + buildInputs = [ autoconf automake help2man libtool glib pkg-config gperf ]; +}) diff --git a/pkgs/naemon/naemon.json b/pkgs/naemon/naemon.json new file mode 100644 index 0000000..c68647f --- /dev/null +++ b/pkgs/naemon/naemon.json @@ -0,0 +1,15 @@ +{ + "tag": "ba6fd20-master", + "meta": { + "name": "naemon", + "url": "https://github.com/naemon/naemon-core", + "branch": "master" + }, + "github": { + "owner": "naemon", + "repo": "naemon-core", + "rev": "ba6fd20221fbdd5c99b4eb5dcf4ee5681c5a9495", + "sha256": "15rvqg985nn05rsgkch4ix8y2wg7a6pb70d63ckzy1inwqjp8z46", + "fetchSubmodules": true + } +}