From d8f933bd00a5cc416da00cd26c9d13f7a1c02486 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Isma=C3=ABl=20Bouya?= Date: Sun, 1 Jul 2018 15:35:43 +0200 Subject: Add monitoring --- .../lib/puppet/provider/package/pacman.rb | 283 ++++++ .../lib/puppet/provider/package/pip2.rb | 17 + .../manifests/package_managers.pp | 6 + modules/profile/files/monitoring/check_command | 113 +++ modules/profile/files/monitoring/check_md_raid | 32 + modules/profile/manifests/monitoring.pp | 51 + modules/profile/manifests/monitoring/commands.pp | 63 ++ modules/profile/manifests/monitoring/contacts.pp | 15 + modules/profile/manifests/monitoring/hosts.pp | 43 + modules/profile/manifests/monitoring/services.pp | 89 ++ modules/profile/manifests/monitoring/times.pp | 22 + .../profile/templates/monitoring/naemon.cfg.erb | 1038 ++++++++++++++++++++ .../profile/templates/monitoring/resource.cfg.erb | 30 + .../profile/templates/monitoring/send_nrdp.sh.erb | 271 +++++ modules/role/manifests/caldance.pp | 1 + 15 files changed, 2074 insertions(+) create mode 100644 modules/base_installation/lib/puppet/provider/package/pacman.rb create mode 100644 modules/base_installation/lib/puppet/provider/package/pip2.rb create mode 100644 modules/profile/files/monitoring/check_command create mode 100644 modules/profile/files/monitoring/check_md_raid create mode 100644 modules/profile/manifests/monitoring.pp create mode 100644 modules/profile/manifests/monitoring/commands.pp create mode 100644 modules/profile/manifests/monitoring/contacts.pp create mode 100644 modules/profile/manifests/monitoring/hosts.pp create mode 100644 modules/profile/manifests/monitoring/services.pp create mode 100644 modules/profile/manifests/monitoring/times.pp create mode 100644 modules/profile/templates/monitoring/naemon.cfg.erb create mode 100644 modules/profile/templates/monitoring/resource.cfg.erb create mode 100755 modules/profile/templates/monitoring/send_nrdp.sh.erb (limited to 'modules') diff --git a/modules/base_installation/lib/puppet/provider/package/pacman.rb b/modules/base_installation/lib/puppet/provider/package/pacman.rb new file mode 100644 index 0000000..0a5e5d0 --- /dev/null +++ b/modules/base_installation/lib/puppet/provider/package/pacman.rb @@ -0,0 +1,283 @@ +require 'puppet/provider/package' +require 'set' +require 'uri' + +Puppet::Type.type(:package).provide :pacman, :parent => Puppet::Provider::Package do + desc "Support for the Package Manager Utility (pacman) used in Archlinux. + + This provider supports the `install_options` attribute, which allows command-line flags to be passed to pacman. + These options should be specified as a string (e.g. '--flag'), a hash (e.g. {'--flag' => 'value'}), + or an array where each element is either a string or a hash." + + # If aura is installed, we can make use of it + def self.aura? + @aura ||= Puppet::FileSystem.exist?('/usr/bin/aura') + end + + commands :pacman => "/usr/bin/pacman" + # Aura is a common AUR helper which, if installed, we can use to query the AUR + commands :aura => "/usr/bin/aura" if aura? + + confine :operatingsystem => [:archlinux, :manjarolinux] + defaultfor :operatingsystem => [:archlinux, :manjarolinux] + has_feature :install_options + has_feature :uninstall_options + has_feature :upgradeable + has_feature :virtual_packages + + # Checks if a given name is a group + def self.group?(name) + begin + !pacman("-Sg", name).empty? + rescue Puppet::ExecutionFailure + # pacman returns an expected non-zero exit code when the name is not a group + false + end + end + + # Install a package using 'pacman', or 'aura' if available. + # Installs quietly, without confirmation or progress bar, updates package + # list from servers defined in pacman.conf. + def install + if @resource[:source] + install_from_file + else + install_from_repo + end + + unless self.query + fail(_("Could not find package '%{name}'") % { name: @resource[:name] }) + end + end + + # Fetch the list of packages and package groups that are currently installed on the system. + # Only package groups that are fully installed are included. If a group adds packages over time, it will not + # be considered as fully installed any more, and we would install the new packages on the next run. + # If a group removes packages over time, nothing will happen. This is intended. + def self.instances + instances = [] + + # Get the installed packages + installed_packages = get_installed_packages + installed_packages.sort_by { |k, _| k }.each do |package, version| + instances << new(to_resource_hash(package, version)) + end + + # Get the installed groups + get_installed_groups(installed_packages).each do |group, version| + instances << new(to_resource_hash(group, version)) + end + + instances + end + + # returns a hash package => version of installed packages + def self.get_installed_packages + begin + packages = {} + execpipe([command(:pacman), "-Q"]) do |pipe| + # pacman -Q output is 'packagename version-rel' + regex = %r{^(\S+)\s(\S+)} + pipe.each_line do |line| + if match = regex.match(line) + packages[match.captures[0]] = match.captures[1] + else + warning(_("Failed to match line '%{line}'") % { line: line }) + end + end + end + packages + rescue Puppet::ExecutionFailure + fail(_("Error getting installed packages")) + end + end + + # returns a hash of group => version of installed groups + def self.get_installed_groups(installed_packages, filter = nil) + groups = {} + begin + # Build a hash of group name => list of packages + command = [command(:pacman), "-Sgg"] + command << filter if filter + execpipe(command) do |pipe| + pipe.each_line do |line| + name, package = line.split + packages = (groups[name] ||= []) + packages << package + end + end + + # Remove any group that doesn't have all its packages installed + groups.delete_if do |_, packages| + !packages.all? { |package| installed_packages[package] } + end + + # Replace the list of packages with a version string consisting of packages that make up the group + groups.each do |name, packages| + groups[name] = packages.sort.map {|package| "#{package} #{installed_packages[package]}"}.join ', ' + end + rescue Puppet::ExecutionFailure + # pacman returns an expected non-zero exit code when the filter name is not a group + raise unless filter + end + groups + end + + # Because Archlinux is a rolling release based distro, installing a package + # should always result in the newest release. + def update + # Install in pacman can be used for update, too + self.install + end + + # We rescue the main check from Pacman with a check on the AUR using aura, if installed + def latest + # Synchronize the database + pacman "-Sy" + + resource_name = @resource[:name] + + # If target is a group, construct the group version + return pacman("-Sp", "--print-format", "%n %v", resource_name).lines.map{ |line| line.chomp }.sort.join(', ') if self.class.group?(resource_name) + + # Start by querying with pacman first + # If that fails, retry using aura against the AUR + pacman_check = true + begin + if pacman_check + output = pacman "-Sp", "--print-format", "%v", resource_name + return output.chomp + else + output = aura "-Ai", resource_name + output.split("\n").each do |line| + return line.split[2].chomp if line.split[0] =~ /Version/ + end + end + rescue Puppet::ExecutionFailure + if pacman_check and self.class.aura? + pacman_check = false # now try the AUR + retry + else + raise + end + end + end + + # Queries information for a package or package group + def query + installed_packages = self.class.get_installed_packages + resource_name = @resource[:name] + + # Check for the resource being a group + version = self.class.get_installed_groups(installed_packages, resource_name)[resource_name] + + if version + unless @resource.allow_virtual? + warning(_("%{resource_name} is a group, but allow_virtual is false.") % { resource_name: resource_name }) + return nil + end + else + version = installed_packages[resource_name] + end + + # Return nil if no package or group found + return nil unless version + + self.class.to_resource_hash(resource_name, version) + end + + def self.to_resource_hash(name, version) + { + :name => name, + :ensure => version, + :provider => self.name + } + end + + # Removes a package from the system. + def uninstall + resource_name = @resource[:name] + + is_group = self.class.group?(resource_name) + + fail(_("Refusing to uninstall package group %{resource_name}, because allow_virtual is false.") % { resource_name: resource_name }) if is_group && !@resource.allow_virtual? + + cmd = %w{--noconfirm --noprogressbar} + cmd += uninstall_options if @resource[:uninstall_options] + cmd << "-R" + cmd << '-s' if is_group + cmd << resource_name + + if self.class.aura? + aura(*cmd) + else + pacman(*cmd) + end + end + + private + + def install_with_aura? + resource_name = @resource[:name] + if !self.class.aura? + return false + end + + begin + pacman "-Sp", resource_name + return false + rescue Puppet::ExecutionFailure + return true + end + end + + def install_options + join_options(@resource[:install_options]) + end + + def uninstall_options + join_options(@resource[:uninstall_options]) + end + + def install_from_file + source = @resource[:source] + begin + source_uri = URI.parse source + rescue => detail + self.fail Puppet::Error, _("Invalid source '%{source}': %{detail}") % { source: source, detail: detail }, detail + end + + source = case source_uri.scheme + when nil then source + when /https?/i then source + when /ftp/i then source + when /file/i then source_uri.path + when /puppet/i + fail _("puppet:// URL is not supported by pacman") + else + fail _("Source %{source} is not supported by pacman") % { source: source } + end + pacman "--noconfirm", "--noprogressbar", "-Sy" + pacman "--noconfirm", "--noprogressbar", "-U", source + end + + def install_from_repo + resource_name = @resource[:name] + + # Refuse to install if not allowing virtual packages and the resource is a group + fail(_("Refusing to install package group %{resource_name}, because allow_virtual is false.") % { resource_name: resource_name }) if self.class.group?(resource_name) && !@resource.allow_virtual? + + cmd = %w{--noconfirm --needed} + cmd += install_options if @resource[:install_options] + + if install_with_aura? + cmd << "-Aq" << resource_name + aura(*cmd) + else + cmd << "--noprogressbar" + cmd << "-Sy" << resource_name + pacman(*cmd) + end + end + +end diff --git a/modules/base_installation/lib/puppet/provider/package/pip2.rb b/modules/base_installation/lib/puppet/provider/package/pip2.rb new file mode 100644 index 0000000..27cc0c4 --- /dev/null +++ b/modules/base_installation/lib/puppet/provider/package/pip2.rb @@ -0,0 +1,17 @@ +require 'puppet/provider/package/pip' + +Puppet::Type.type(:package).provide :pip2, + :parent => :pip do + + desc "Python packages via `pip2`. + + This provider supports the `install_options` attribute, which allows command-line flags to be passed to pip2. + These options should be specified as a string (e.g. '--flag'), a hash (e.g. {'--flag' => 'value'}), + or an array where each element is either a string or a hash." + + has_feature :installable, :uninstallable, :upgradeable, :versionable, :install_options + + def self.cmd + ["pip2"] + end +end diff --git a/modules/base_installation/manifests/package_managers.pp b/modules/base_installation/manifests/package_managers.pp index c5c8485..a03085d 100644 --- a/modules/base_installation/manifests/package_managers.pp +++ b/modules/base_installation/manifests/package_managers.pp @@ -18,6 +18,12 @@ class base_installation::package_managers inherits base_installation { include => '/etc/pacman.d/mirrorlist' } + pacman::repo { 'immae': + order => 0, + server => 'https://git.immae.eu/releases/packages/', + siglevel => 'Optional' + } + class { 'aur': } contain "pacman" diff --git a/modules/profile/files/monitoring/check_command b/modules/profile/files/monitoring/check_command new file mode 100644 index 0000000..2c7eded --- /dev/null +++ b/modules/profile/files/monitoring/check_command @@ -0,0 +1,113 @@ +#!/usr/bin/perl + +use strict; +use Getopt::Std; +$| = 1; + +my %opts; +getopts('hr:C:c:s:o:', \%opts); + +my $STATE_OK = 0; +my $STATE_WARNING = 1; +my $STATE_CRITICAL = 2; +my $STATE_UNKNOWN = 3; + +if ($opts{'h'} || scalar(%opts) == 0) { + &print_help(); + exit($STATE_OK); +} + +my $command = $opts{'c'}; +if ($command eq '') { + print "You must provide a command to check.\n"; + exit($STATE_UNKNOWN); +} + +my $expected_output = $opts{'o'}; +my $expected_status = $opts{'s'}; +my $other_command = $opts{'C'}; + +if ($other_command eq '' and $expected_status eq '' and $expected_output eq '') { + $expected_status = 0; +} + +my $cmd = $command . ' 2>&1'; +my $other_cmd; +if ($other_command ne '') { + $other_cmd = $other_command . ' 2>&1'; +} + +my $run_as; +if ($opts{'r'}) { + $run_as = $opts{'r'}; + $cmd = "sudo -u $run_as -n $cmd"; + + if ($other_command ne '') { + $other_cmd = "sudo -u $run_as -n $other_cmd"; + } + +} + +my $cmd_result = `$cmd`; +my $other_cmd_result; +if ($other_command ne '') { + $other_cmd_result = `$other_cmd`; + chomp($other_cmd_result); +} + +chomp($cmd_result); +if ($cmd_result =~ /sudo/i) { + print "$command CRITICAL - No sudo right to run the command\n"; + exit($STATE_UNKNOWN); +} elsif ($expected_status ne '') { + if ($? != $expected_status) { + print "$command CRITICAL - Response status $?\n"; + exit($STATE_CRITICAL); + } else { + print "$command OK - Response status $?\n"; + exit($STATE_OK); + } +} elsif ($other_command ne '') { + if ($cmd_result ne $other_cmd_result) { + print "$command CRITICAL - Expected output not matching other command output\n"; + exit($STATE_CRITICAL); + } else { + print "$command OK - Expected output matching other command output\n"; + exit($STATE_OK); + } +} else { + if ($cmd_result !~ /$expected_output/) { + print "$command CRITICAL - Expected output not matching\n"; + exit($STATE_CRITICAL); + } else { + print "$command OK - Expected output matching\n"; + exit($STATE_OK); + } +} + +sub print_help() { + print << "EOF"; +Check whether the given command responds as expected. One of -o -C or -s must be selected. + +Options: +-h + Print detailed help screen + +-c + command to run (required) + +-C + other command to compare output + +-r user + Run as user via sudo. + +-s + status code to check + +-o + output to check + +EOF +} + diff --git a/modules/profile/files/monitoring/check_md_raid b/modules/profile/files/monitoring/check_md_raid new file mode 100644 index 0000000..9c79a7a --- /dev/null +++ b/modules/profile/files/monitoring/check_md_raid @@ -0,0 +1,32 @@ +#!/bin/bash +# +# Created by Sebastian Grewe, Jammicron Technology +# + +# Get count of raid arrays +RAID_DEVICES=`grep ^md -c /proc/mdstat` + +# Get count of degraded arrays +RAID_STATUS=`grep "\[.*_.*\]" /proc/mdstat -c` + +# Is an array currently recovering, get percentage of recovery +RAID_RECOVER=`grep recovery /proc/mdstat | awk '{print $4}'` + +# Check raid status +# RAID recovers --> Warning +if [[ $RAID_RECOVER ]]; then + STATUS="WARNING - Checked $RAID_DEVICES arrays, recovering : $RAID_RECOVER" + EXIT=1 +# RAID ok +elif [[ $RAID_STATUS == "0" ]]; then + STATUS="OK - Checked $RAID_DEVICES arrays." + EXIT=0 +# All else critical, better save than sorry +else + STATUS="CRITICAL - Checked $RAID_DEVICES arrays, $RAID_STATUS have FAILED" + EXIT=2 +fi + +# Status and quit +echo $STATUS +exit $EXIT diff --git a/modules/profile/manifests/monitoring.pp b/modules/profile/manifests/monitoring.pp new file mode 100644 index 0000000..beabe70 --- /dev/null +++ b/modules/profile/manifests/monitoring.pp @@ -0,0 +1,51 @@ +class profile::monitoring ( + Optional[String] $naemon_url = undef, + Optional[String] $naemon_token = undef, +) { + ensure_packages(["naemon", "cnagios"]) + + file { "/etc/naemon": + ensure => "directory", + recurse => true, + purge => true, + force => true, + require => Package["naemon"], + } + -> + file { "/etc/naemon/resource.cfg": + ensure => "file", + owner => "naemon", + group => "naemon", + mode => "0600", + content => template("profile/monitoring/resource.cfg.erb"), + } + -> + file { "/etc/naemon/naemon.cfg": + ensure => "file", + owner => "naemon", + group => "naemon", + mode => "0644", + content => template("profile/monitoring/naemon.cfg.erb"), + } + -> + service { "naemon": + ensure => "running", + enable => true, + } + + unless ($naemon_url == undef or empty($naemon_url)) { + file { "/etc/naemon/send_nrdp.sh": + ensure => "file", + owner => "naemon", + group => "naemon", + mode => "0700", + content => template("profile/monitoring/send_nrdp.sh.erb"), + } + } + + include "profile::monitoring::hosts" + include "profile::monitoring::services" + include "profile::monitoring::commands" + include "profile::monitoring::times" + include "profile::monitoring::contacts" +} diff --git a/modules/profile/manifests/monitoring/commands.pp b/modules/profile/manifests/monitoring/commands.pp new file mode 100644 index 0000000..7e9683f --- /dev/null +++ b/modules/profile/manifests/monitoring/commands.pp @@ -0,0 +1,63 @@ +class profile::monitoring::commands inherits profile::monitoring { + ensure_packages(["monitoring-plugins"]) + + file { "/etc/naemon/monitoring-plugins": + ensure => "directory", + owner => "naemon", + group => "naemon", + mode => "0700", + } + + file { "/etc/naemon/monitoring-plugins/check_command": + ensure => "present", + owner => "naemon", + group => "naemon", + mode => "0700", + source => "puppet:///modules/profile/monitoring/check_command", + } + + file { "/etc/naemon/monitoring-plugins/check_md_raid": + ensure => "present", + owner => "naemon", + group => "naemon", + mode => "0700", + source => "puppet:///modules/profile/monitoring/check_md_raid", + } + + Nagios_command { + ensure => "present", + owner => "naemon", + group => "naemon", + target => "/etc/naemon/objects.cfg", + notify => Service["naemon"], + before => Service["naemon"], + } + + nagios_command { + "check-host-alive": + command_line => '$USER1$/check_ping -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 5'; + "check_local_disk": + command_line => '$USER1$/check_disk -w $ARG1$ -c $ARG2$ -p $ARG3$'; + "check_local_procs": + command_line => '$USER1$/check_procs -w $ARG1$ -c $ARG2$ -s $ARG3$'; + "check_local_load": + command_line => '$USER1$/check_load -w $ARG1$ -c $ARG2$'; + "check_local_swap": + command_line => '$USER1$/check_swap -n ok -w $ARG1$ -c $ARG2$'; + "check_ntp": + command_line => '$USER1$/check_ntp_time -H 0.arch.pool.ntp.org'; + "check_md_raid": + command_line => '$USER2$/check_md_raid'; + "check_command_output": + command_line => '$USER2$/check_command -c "$ARG1$" -o "$ARG2$" $ARG3$', + require => File["/etc/naemon/monitoring-plugins/check_command"]; + } + + unless empty($naemon_url) { + nagios_command { + "notify-master": + command_line => '/etc/naemon/send_nrdp.sh -H "$HOSTADDRESS$" -s "$SERVICEDESC$" -S "$SERVICESTATEID$" -o "$SERVICEOUTPUT$"', + require => File["/etc/naemon/send_nrdp.sh"]; + } + } +} diff --git a/modules/profile/manifests/monitoring/contacts.pp b/modules/profile/manifests/monitoring/contacts.pp new file mode 100644 index 0000000..284d51c --- /dev/null +++ b/modules/profile/manifests/monitoring/contacts.pp @@ -0,0 +1,15 @@ +class profile::monitoring::contacts { + Nagios_contactgroup { + ensure => "present", + owner => "naemon", + group => "naemon", + target => "/etc/naemon/objects.cfg", + notify => Service["naemon"], + before => Service["naemon"], + } + + nagios_contactgroup { "admins": + alias => "System administrators", + } + +} diff --git a/modules/profile/manifests/monitoring/hosts.pp b/modules/profile/manifests/monitoring/hosts.pp new file mode 100644 index 0000000..306fe3e --- /dev/null +++ b/modules/profile/manifests/monitoring/hosts.pp @@ -0,0 +1,43 @@ +class profile::monitoring::hosts { + $real_hostname = lookup("base_installation::real_hostname") + + Nagios_hostgroup { + ensure => "present", + owner => "naemon", + group => "naemon", + target => "/etc/naemon/objects.cfg", + notify => Service["naemon"], + before => Service["naemon"], + } + + Nagios_host { + ensure => "present", + owner => "naemon", + group => "naemon", + target => "/etc/naemon/objects.cfg", + notify => Service["naemon"], + before => Service["naemon"], + } + + nagios_hostgroup { "linux-servers": + alias => "Linux Servers", + members => [$real_hostname], + } + + $host_linux_server = { + check_command => "check-host-alive", + check_interval => 5, + check_period => "24x7", + contact_groups => "admins", + max_check_attempts => "10", + notification_interval => "120", + notification_options => "d,u,r", + retry_interval => "1", + } + + nagios_host { $real_hostname: + address => $real_hostname; + default: * => $host_linux_server, + } + +} diff --git a/modules/profile/manifests/monitoring/services.pp b/modules/profile/manifests/monitoring/services.pp new file mode 100644 index 0000000..39c2def --- /dev/null +++ b/modules/profile/manifests/monitoring/services.pp @@ -0,0 +1,89 @@ +class profile::monitoring::services { + $real_hostname = lookup("base_installation::real_hostname") + + Nagios_service { + ensure => "present", + owner => "naemon", + group => "naemon", + target => "/etc/naemon/objects.cfg", + notify => Service["naemon"], + before => Service["naemon"], + } + + $service_generic = { + active_checks_enabled => "1", + check_freshness => "0", + check_interval => "10", + check_period => "24x7", + contact_groups => "admins", + event_handler_enabled => "1", + flap_detection_enabled => "1", + is_volatile => "0", + max_check_attempts => "3", + notification_interval => "60", + notification_options => "w,u,c,r", + notification_period => "24x7", + notifications_enabled => "1", + obsess_over_service => "1", + passive_checks_enabled => "1", + process_perf_data => "1", + retain_nonstatus_information => "1", + retain_status_information => "1", + retry_interval => "2", + } + + + $service_local = merge($service_generic, { + host_name => $real_hostname, + check_interval => "5", + max_check_attempts => "4", + retry_interval => "1", + }) + + sudo::conf { + default: + sudo_file_name => "naemon"; + 'naemon-fail2ban': + content => "naemon ALL=(root) NOPASSWD: /usr/bin/fail2ban-client ping"; + } + + nagios_service { + default: * => $service_local; + "Size on root partition": + service_description => "Size on root partition", + check_command => "check_local_disk!20%!10%!/"; + "Total number of process": + service_description => "Total number of process", + check_command => "check_local_procs!250!400!RSZDT"; + "Average load": + service_description => "Average load", + check_command => "check_local_load!8.0,8.0,8.0!10.0,10.0,10.0"; + "Swap usage": + service_description => "Swap usage", + check_command => "check_local_swap!20!10"; + "fail2ban is active": + service_description => "fail2ban is active", + check_command => "check_command_output!fail2ban-client ping!pong!-r root", + require => Sudo::Conf["naemon-fail2ban"]; + "NTP is activated and working": + service_description => "NTP is activated and working", + check_command => "check_ntp"; + } + + if empty(find_file("/proc/mdstat")) { + nagios_service { + default: * => $service_local; + "No mdadm array is degraded": + ensure => "absent", + service_description => "No mdadm array is degraded", + check_command => "check_md_raid"; + } + } else { + nagios_service { + default: * => $service_local; + "No mdadm array is degraded": + service_description => "No mdadm array is degraded", + check_command => "check_md_raid"; + } + } +} diff --git a/modules/profile/manifests/monitoring/times.pp b/modules/profile/manifests/monitoring/times.pp new file mode 100644 index 0000000..fb61acc --- /dev/null +++ b/modules/profile/manifests/monitoring/times.pp @@ -0,0 +1,22 @@ +class profile::monitoring::times { + Nagios_timeperiod { + ensure => "present", + owner => "naemon", + group => "naemon", + target => "/etc/naemon/objects.cfg", + notify => Service["naemon"], + before => Service["naemon"], + } + + nagios_timeperiod { "24x7": + alias => "24 Hours A Day, 7 Days A Week", + monday => "00:00-24:00", + tuesday => "00:00-24:00", + wednesday => "00:00-24:00", + thursday => "00:00-24:00", + friday => "00:00-24:00", + saturday => "00:00-24:00", + sunday => "00:00-24:00", + } + +} diff --git a/modules/profile/templates/monitoring/naemon.cfg.erb b/modules/profile/templates/monitoring/naemon.cfg.erb new file mode 100644 index 0000000..78bada3 --- /dev/null +++ b/modules/profile/templates/monitoring/naemon.cfg.erb @@ -0,0 +1,1038 @@ +############################################################################## +# +# naemon.cfg - Sample Main Config File for Naemon 1.0.7 +# +# Read the documentation for more information on this configuration +# file. I've provided some comments here, but things may not be so +# clear without further explanation. +# +# +############################################################################## + + +# LOG FILE +# This is the main log file where service and host events are logged +# for historical purposes. This should be the first option specified +# in the config file!!! + +log_file=/var/log/naemon/naemon.log + + + +# OBJECT CONFIGURATION FILE(S) +# These are the object configuration files in which you define hosts, +# host groups, contacts, contact groups, services, etc. +# You can split your object definitions across several config files +# if you wish (as shown below), or keep them all in a single config file. + +# You can specify individual object config files as shown below: +cfg_file=/etc/naemon/objects.cfg +#cfg_file=/etc/naemon/objects/commands.cfg +#cfg_file=/etc/naemon/objects/contacts.cfg +#cfg_file=/etc/naemon/objects/timeperiods.cfg +#cfg_file=/etc/naemon/objects/templates.cfg + + +# You can also tell naemon to process all config files (with a .cfg +# extension) in a particular directory by using the cfg_dir +# directive as shown below: +#cfg_dir=/etc/naemon/conf.d + + + + +# OBJECT CACHE FILE +# This option determines where object definitions are cached when +# naemon starts/restarts. The CGIs read object definitions from +# this cache file (rather than looking at the object config files +# directly) in order to prevent inconsistencies that can occur +# when the config files are modified after naemon starts. + +object_cache_file=/var/lib/naemon/objects.cache + + + +# PRE-CACHED OBJECT FILE +# This options determines the location of the precached object file. +# If you run naemon with the -p command line option, it will preprocess +# your object configuration file(s) and write the cached config to this +# file. You can then start naemon with the -u option to have it read +# object definitions from this precached file, rather than the standard +# object configuration files (see the cfg_file and cfg_dir options above). +# Using a precached object file can speed up the time needed to (re)start +# the naemon process if you've got a large and/or complex configuration. +# Read the documentation section on optimizing naemon to find our more +# about how this feature works. + +precached_object_file=/var/lib/naemon/objects.precache + + + +# RESOURCE FILE +# This is an optional resource file that contains $USERx$ macro +# definitions. Multiple resource files can be specified by using +# multiple resource_file definitions. The CGIs will not attempt to +# read the contents of resource files, so information that is +# considered to be sensitive (usernames, passwords, etc) can be +# defined as macros in this file and restrictive permissions (600) +# can be placed on this file. + +resource_file=/etc/naemon/resource.cfg + + + +# STATUS FILE +# This is where the current status of all monitored services and +# hosts is stored. Its contents are read and processed by the CGIs. +# The contents of the status file are deleted every time naemon +# restarts. + +status_file=/var/lib/naemon/status.dat + + + +# STATUS FILE UPDATE INTERVAL +# This option determines the frequency (in seconds) that +# naemon will periodically dump program, host, and +# service status data. Set it to 0 to disable updates. + +status_update_interval=10 + + + +# EXTERNAL COMMAND OPTION +# This option allows you to specify whether or not Naemon should check +# for external commands (in the command file defined below). By default +# Naemon will *not* check for external commands, just to be on the +# cautious side. If you want to be able to use the CGI command interface +# you will have to enable this. +# Values: 0 = disable commands, 1 = enable commands + +check_external_commands=1 + + + +# EXTERNAL COMMAND FILE +# This is the file that Naemon checks for external command requests. +# It is also where the command CGI will write commands that are submitted +# by users, so it must be writeable by the user that the web server +# is running as (usually 'nobody'). Permissions should be set at the +# directory level instead of on the file, as the file is deleted every +# time its contents are processed. + +command_file=/var/lib/naemon/naemon.cmd + + + +# QUERY HANDLER INTERFACE +# This is the socket that is created for the Query Handler interface + +#query_socket=/var/lib/naemon/naemon.qh + + + +# LOCK FILE +# This is the lockfile that Naemon will use to store its PID number +# in when it is running in daemon mode. + +lock_file=/run/naemon/naemon.pid + + + +# TEMP FILE +# This is a temporary file that is used as scratch space when Naemon +# updates the status log, cleans the comment file, etc. This file +# is created, used, and deleted throughout the time that Naemon is +# running. + +temp_file=/var/lib/naemon/naemon.tmp + + + +# TEMP PATH +# This is path where Naemon can create temp files for service and +# host check results, etc. + +temp_path=/var/cache/naemon + + + +# EVENT BROKER OPTIONS +# Controls what (if any) data gets sent to the event broker. +# Values: 0 = Broker nothing +# -1 = Broker everything +# = See documentation + +event_broker_options=-1 + + + +# EVENT BROKER MODULE(S) +# This directive is used to specify an event broker module that should +# be loaded by Naemon at startup. Use multiple directives if you want +# to load more than one module. Arguments that should be passed to +# the module at startup are seperated from the module path by a space. +# +# Example: +# +# broker_module= [moduleargs] + +#broker_module=/usr/lib/naemon/naemon-livestatus/livestatus.so /var/cache/naemon/live +#broker_module=/somewhere/module1.o +#broker_module=/somewhere/module2.o arg1 arg2=3 debug=0 + +# In order to provide drop-in support for new modules, you can also make use of +# the include_dir directive. The include_dir directive causes Naemon to parse +# any configuration (not just object configuration, as with cfg_dir) as if the +# contents of the files in the pointed-to directory was included on this line. +# The path to the directory is relative to the path of the main naemon.cfg +# file. +# include_dir=module-conf.d + +# LOG ARCHIVE PATH +# This is the directory where archived (rotated) log files are placed by the +# logrotate daemon. It is used by out of core add-ons to discover the logfiles. + +log_archive_path=/var/log/naemon/archives + + + +# LOGGING OPTIONS +# If you want messages logged to the syslog facility, as well as the +# Naemon log file set this option to 1. If not, set it to 0. + +use_syslog=1 + +# NOTIFICATION LOGGING OPTION +# If you don't want notifications to be logged, set this value to 0. +# If notifications should be logged, set the value to 1. + +log_notifications=1 + +# Notification suppression reason (NSR) logging causes the reason for a +# notification suppression to be logged, when it occurs. This can potentially +# add some noise to your log file, but is highly useful when troubleshooting +# missing notifications. + +enable_notification_suppression_reason_logging=1 + + +# SERVICE RETRY LOGGING OPTION +# If you don't want service check retries to be logged, set this value +# to 0. If retries should be logged, set the value to 1. + +log_service_retries=1 + + + +# HOST RETRY LOGGING OPTION +# If you don't want host check retries to be logged, set this value to +# 0. If retries should be logged, set the value to 1. + +log_host_retries=1 + + + +# EVENT HANDLER LOGGING OPTION +# If you don't want host and service event handlers to be logged, set +# this value to 0. If event handlers should be logged, set the value +# to 1. + +log_event_handlers=1 + + + +# INITIAL STATES LOGGING OPTION +# If you want Naemon to log all initial host and service states to +# the main log file (the first time the service or host is checked) +# you can enable this option by setting this value to 1. If you +# are not using an external application that does long term state +# statistics reporting, you do not need to enable this option. In +# this case, set the value to 0. + +log_initial_states=1 + + + +# CURRENT STATES LOGGING OPTION +# If you don't want Naemon to log all current host and service states +# after log has been rotated to the main log file, you can disable this +# option by setting this value to 0. Default value is 1. + +log_current_states=1 + + + +# EXTERNAL COMMANDS LOGGING OPTION +# If you don't want Naemon to log external commands, set this value +# to 0. If external commands should be logged, set this value to 1. +# Note: This option does not include logging of passive service +# checks - see the option below for controlling whether or not +# passive checks are logged. + +log_external_commands=1 + + + +# PASSIVE CHECKS LOGGING OPTION +# If you don't want Naemon to log passive host and service checks, set +# this value to 0. If passive checks should be logged, set +# this value to 1. + +log_passive_checks=1 + + + +# GLOBAL HOST AND SERVICE EVENT HANDLERS +# These options allow you to specify a host and service event handler +# command that is to be run for every host or service state change. +# The global event handler is executed immediately prior to the event +# handler that you have optionally specified in each host or +# service definition. The command argument is the short name of a +# command definition that you define in your host configuration file. +# Read the HTML docs for more information. + +#global_host_event_handler=somecommand +#global_service_event_handler=somecommand + + + +# MAXIMUM CONCURRENT SERVICE CHECKS +# This option allows you to specify the maximum number of +# service checks that can be run in parallel at any given time. +# Specifying a value of 1 for this variable essentially prevents +# any service checks from being parallelized. A value of 0 +# will not restrict the number of concurrent checks that are +# being executed. + +max_concurrent_checks=0 + + +# CHECK RESULT PATH +# This is directory where Naemon reads check results of host and +# service checks to further process them. +# +# Note: Naemon does not require this folder internally but it still +# can be used to pass check results to Naemon. + +check_result_path=/var/cache/naemon/checkresults + + +# CACHED HOST CHECK HORIZON +# This option determines the maximum amount of time (in seconds) +# that the state of a previous host check is considered current. +# Cached host states (from host checks that were performed more +# recently that the timeframe specified by this value) can immensely +# improve performance in regards to the host check logic. +# Too high of a value for this option may result in inaccurate host +# states being used by Naemon, while a lower value may result in a +# performance hit for host checks. Use a value of 0 to disable host +# check caching. + +cached_host_check_horizon=15 + + + +# CACHED SERVICE CHECK HORIZON +# This option determines the maximum amount of time (in seconds) +# that the state of a previous service check is considered current. +# Cached service states (from service checks that were performed more +# recently that the timeframe specified by this value) can immensely +# improve performance in regards to predictive dependency checks. +# Use a value of 0 to disable service check caching. + +cached_service_check_horizon=15 + + + +# ENABLE PREDICTIVE HOST DEPENDENCY CHECKS +# This option determines whether or not Naemon will attempt to execute +# checks of hosts when it predicts that future dependency logic test +# may be needed. These predictive checks can help ensure that your +# host dependency logic works well. +# Values: +# 0 = Disable predictive checks +# 1 = Enable predictive checks (default) + +enable_predictive_host_dependency_checks=1 + + + +# ENABLE PREDICTIVE SERVICE DEPENDENCY CHECKS +# This option determines whether or not Naemon will attempt to execute +# checks of service when it predicts that future dependency logic test +# may be needed. These predictive checks can help ensure that your +# service dependency logic works well. +# Values: +# 0 = Disable predictive checks +# 1 = Enable predictive checks (default) + +enable_predictive_service_dependency_checks=1 + + + +# SOFT STATE DEPENDENCIES +# This option determines whether or not Naemon will use soft state +# information when checking host and service dependencies. Normally +# Naemon will only use the latest hard host or service state when +# checking dependencies. If you want it to use the latest state (regardless +# of whether its a soft or hard state type), enable this option. +# Values: +# 0 = Don't use soft state dependencies (default) +# 1 = Use soft state dependencies + +soft_state_dependencies=0 + + + +# TIME CHANGE ADJUSTMENT THRESHOLDS +# These options determine when Naemon will react to detected changes +# in system time (either forward or backwards). + +#time_change_threshold=900 + + + +# TIMEOUT VALUES +# These options control how much time Naemon will allow various +# types of commands to execute before killing them off. Options +# are available for controlling maximum time allotted for +# service checks, host checks, event handlers, notifications, the +# ocsp command, and performance data commands. All values are in +# seconds. + +service_check_timeout=75 +host_check_timeout=30 +event_handler_timeout=30 +notification_timeout=30 +ocsp_timeout=5 +perfdata_timeout=5 + + + +# RETAIN STATE INFORMATION +# This setting determines whether or not Naemon will save state +# information for services and hosts before it shuts down. Upon +# startup Naemon will reload all saved service and host state +# information before starting to monitor. This is useful for +# maintaining long-term data on state statistics, etc, but will +# slow Naemon down a bit when it (re)starts. Since its only +# a one-time penalty, I think its well worth the additional +# startup delay. + +retain_state_information=1 + + + +# STATE RETENTION FILE +# This is the file that Naemon should use to store host and +# service state information before it shuts down. The state +# information in this file is also read immediately prior to +# starting to monitor the network when Naemon is restarted. +# This file is used only if the retain_state_information +# variable is set to 1. + +state_retention_file=/var/lib/naemon/retention.dat + + + +# RETENTION DATA UPDATE INTERVAL +# This setting determines how often (in minutes) that Naemon +# will automatically save retention data during normal operation. +# If you set this value to 0, Naemon will not save retention +# data at regular interval, but it will still save retention +# data before shutting down or restarting. If you have disabled +# state retention, this option has no effect. + +retention_update_interval=60 + + + +# USE RETAINED PROGRAM STATE +# This setting determines whether or not Naemon will set +# program status variables based on the values saved in the +# retention file. If you want to use retained program status +# information, set this value to 1. If not, set this value +# to 0. + +use_retained_program_state=1 + + + +# USE RETAINED SCHEDULING INFO +# This setting determines whether or not Naemon will retain +# the scheduling info (next check time) for hosts and services +# based on the values saved in the retention file. If you +# If you want to use retained scheduling info, set this +# value to 1. If not, set this value to 0. + +use_retained_scheduling_info=1 + + + +# RETAINED ATTRIBUTE MASKS (ADVANCED FEATURE) +# The following variables are used to specify specific host and +# service attributes that should *not* be retained by Naemon during +# program restarts. +# +# The values of the masks are bitwise ANDs of values specified +# by the "MODATTR_" definitions found in include/common.h. +# For example, if you do not want the current enabled/disabled state +# of flap detection and event handlers for hosts to be retained, you +# would use a value of 24 for the host attribute mask... +# MODATTR_EVENT_HANDLER_ENABLED (8) + MODATTR_FLAP_DETECTION_ENABLED (16) = 24 + +# This mask determines what host attributes are not retained +retained_host_attribute_mask=0 + +# This mask determines what service attributes are not retained +retained_service_attribute_mask=0 + +# These two masks determine what process attributes are not retained. +# There are two masks, because some process attributes have host and service +# options. For example, you can disable active host checks, but leave active +# service checks enabled. +retained_process_host_attribute_mask=0 +retained_process_service_attribute_mask=0 + +# These two masks determine what contact attributes are not retained. +# There are two masks, because some contact attributes have host and +# service options. For example, you can disable host notifications for +# a contact, but leave service notifications enabled for them. +retained_contact_host_attribute_mask=0 +retained_contact_service_attribute_mask=0 + + + +# INTERVAL LENGTH +# This is the seconds per unit interval as used in the +# host/contact/service configuration files. Setting this to 60 means +# that each interval is one minute long (60 seconds). Other settings +# have not been tested much, so your mileage is likely to vary... + +interval_length=60 + + + +# AGGRESSIVE HOST CHECKING OPTION +# If you don't want to turn on aggressive host checking features, set +# this value to 0 (the default). Otherwise set this value to 1 to +# enable the aggressive check option. Read the docs for more info +# on what aggressive host check is or check out the source code in +# base/checks.c + +use_aggressive_host_checking=0 + + + +# SERVICE CHECK EXECUTION OPTION +# This determines whether or not Naemon will actively execute +# service checks when it initially starts. If this option is +# disabled, checks are not actively made, but Naemon can still +# receive and process passive check results that come in. Unless +# you're implementing redundant hosts or have a special need for +# disabling the execution of service checks, leave this enabled! +# Values: 1 = enable checks, 0 = disable checks + +execute_service_checks=1 + + + +# PASSIVE SERVICE CHECK ACCEPTANCE OPTION +# This determines whether or not Naemon will accept passive +# service checks results when it initially (re)starts. +# Values: 1 = accept passive checks, 0 = reject passive checks + +accept_passive_service_checks=1 + + + +# HOST CHECK EXECUTION OPTION +# This determines whether or not Naemon will actively execute +# host checks when it initially starts. If this option is +# disabled, checks are not actively made, but Naemon can still +# receive and process passive check results that come in. Unless +# you're implementing redundant hosts or have a special need for +# disabling the execution of host checks, leave this enabled! +# Values: 1 = enable checks, 0 = disable checks + +execute_host_checks=1 + + + +# PASSIVE HOST CHECK ACCEPTANCE OPTION +# This determines whether or not Naemon will accept passive +# host checks results when it initially (re)starts. +# Values: 1 = accept passive checks, 0 = reject passive checks + +accept_passive_host_checks=1 + + + +# NOTIFICATIONS OPTION +# This determines whether or not Naemon will sent out any host or +# service notifications when it is initially (re)started. +# Values: 1 = enable notifications, 0 = disable notifications + +enable_notifications=1 + + + +# EVENT HANDLER USE OPTION +# This determines whether or not Naemon will run any host or +# service event handlers when it is initially (re)started. Unless +# you're implementing redundant hosts, leave this option enabled. +# Values: 1 = enable event handlers, 0 = disable event handlers + +enable_event_handlers=1 + + + +# PROCESS PERFORMANCE DATA OPTION +# This determines whether or not Naemon will process performance +# data returned from service and host checks. If this option is +# enabled, host performance data will be processed using the +# host_perfdata_command (defined below) and service performance +# data will be processed using the service_perfdata_command (also +# defined below). Read the HTML docs for more information on +# performance data. +# Values: 1 = process performance data, 0 = do not process performance data + +process_performance_data=0 + + + +# HOST AND SERVICE PERFORMANCE DATA PROCESSING COMMANDS +# These commands are run after every host and service check is +# performed. These commands are executed only if the +# enable_performance_data option (above) is set to 1. The command +# argument is the short name of a command definition that you +# define in your host configuration file. Read the HTML docs for +# more information on performance data. + +#host_perfdata_command=process-host-perfdata +#service_perfdata_command=process-service-perfdata + + + +# HOST AND SERVICE PERFORMANCE DATA FILES +# These files are used to store host and service performance data. +# Performance data is only written to these files if the +# enable_performance_data option (above) is set to 1. + +#host_perfdata_file=/var/lib/naemon/host-perfdata +#service_perfdata_file=/var/lib/naemon/service-perfdata + + + +# HOST AND SERVICE PERFORMANCE DATA FILE TEMPLATES +# These options determine what data is written (and how) to the +# performance data files. The templates may contain macros, special +# characters (\t for tab, \r for carriage return, \n for newline) +# and plain text. A newline is automatically added after each write +# to the performance data file. Some examples of what you can do are +# shown below. + +#host_perfdata_file_template=[HOSTPERFDATA]\t$TIMET$\t$HOSTNAME$\t$HOSTEXECUTIONTIME$\t$HOSTOUTPUT$\t$HOSTPERFDATA$ +#service_perfdata_file_template=[SERVICEPERFDATA]\t$TIMET$\t$HOSTNAME$\t$SERVICEDESC$\t$SERVICEEXECUTIONTIME$\t$SERVICELATENCY$\t$SERVICEOUTPUT$\t$SERVICEPERFDATA$ + + + +# HOST AND SERVICE PERFORMANCE DATA FILE MODES +# This option determines whether or not the host and service +# performance data files are opened in write ("w") or append ("a") +# mode. If you want to use named pipes, you should use the special +# pipe ("p") mode which avoid blocking at startup, otherwise you will +# likely want the defult append ("a") mode. + +#host_perfdata_file_mode=a +#service_perfdata_file_mode=a + + + +# HOST AND SERVICE PERFORMANCE DATA FILE PROCESSING INTERVAL +# These options determine how often (in seconds) the host and service +# performance data files are processed using the commands defined +# below. A value of 0 indicates the files should not be periodically +# processed. + +#host_perfdata_file_processing_interval=0 +#service_perfdata_file_processing_interval=0 + + + +# HOST AND SERVICE PERFORMANCE DATA FILE PROCESSING COMMANDS +# These commands are used to periodically process the host and +# service performance data files. The interval at which the +# processing occurs is determined by the options above. + +#host_perfdata_file_processing_command=process-host-perfdata-file +#service_perfdata_file_processing_command=process-service-perfdata-file + + + +# HOST AND SERVICE PERFORMANCE DATA PROCESS EMPTY RESULTS +# These options determine wether the core will process empty perfdata +# results or not. This is needed for distributed monitoring, and intentionally +# turned on by default. +# If you don't require empty perfdata - saving some cpu cycles +# on unwanted macro calculation - you can turn that off. Be careful! +# Values: 1 = enable, 0 = disable + +#host_perfdata_process_empty_results=1 +#service_perfdata_process_empty_results=1 + + +# OBSESS OVER SERVICE CHECKS OPTION +# This determines whether or not Naemon will obsess over service +# checks and run the ocsp_command defined below. Unless you're +# planning on implementing distributed monitoring, do not enable +# this option. Read the HTML docs for more information on +# implementing distributed monitoring. +# Values: 1 = obsess over services, 0 = do not obsess (default) + +obsess_over_services=<%= @naemon_url.nil? ? 0 : 1 %> + + + +# OBSESSIVE COMPULSIVE SERVICE PROCESSOR COMMAND +# This is the command that is run for every service check that is +# processed by Naemon. This command is executed only if the +# obsess_over_services option (above) is set to 1. The command +# argument is the short name of a command definition that you +# define in your host configuration file. Read the HTML docs for +# more information on implementing distributed monitoring. + +<% if !@naemon_url.nil? %> +ocsp_command=notify-master +<% end %> + + + +# OBSESS OVER HOST CHECKS OPTION +# This determines whether or not Naemon will obsess over host +# checks and run the ochp_command defined below. Unless you're +# planning on implementing distributed monitoring, do not enable +# this option. Read the HTML docs for more information on +# implementing distributed monitoring. +# Values: 1 = obsess over hosts, 0 = do not obsess (default) + +obsess_over_hosts=0 + + + +# OBSESSIVE COMPULSIVE HOST PROCESSOR COMMAND +# This is the command that is run for every host check that is +# processed by Naemon. This command is executed only if the +# obsess_over_hosts option (above) is set to 1. The command +# argument is the short name of a command definition that you +# define in your host configuration file. Read the HTML docs for +# more information on implementing distributed monitoring. + +#ochp_command=somecommand + + + +# TRANSLATE PASSIVE HOST CHECKS OPTION +# This determines whether or not Naemon will translate +# DOWN/UNREACHABLE passive host check results into their proper +# state for this instance of Naemon. This option is useful +# if you have distributed or failover monitoring setup. In +# these cases your other Naemon servers probably have a different +# "view" of the network, with regards to the parent/child relationship +# of hosts. If a distributed monitoring server thinks a host +# is DOWN, it may actually be UNREACHABLE from the point of +# this Naemon instance. Enabling this option will tell Naemon +# to translate any DOWN or UNREACHABLE host states it receives +# passively into the correct state from the view of this server. +# Values: 1 = perform translation, 0 = do not translate (default) + +translate_passive_host_checks=0 + + + +# PASSIVE HOST CHECKS ARE SOFT OPTION +# This determines whether or not Naemon will treat passive host +# checks as being HARD or SOFT. By default, a passive host check +# result will put a host into a HARD state type. This can be changed +# by enabling this option. +# Values: 0 = passive checks are HARD, 1 = passive checks are SOFT + +passive_host_checks_are_soft=0 + + + +# ORPHANED HOST/SERVICE CHECK OPTIONS +# These options determine whether or not Naemon will periodically +# check for orphaned host service checks. Since service checks are +# not rescheduled until the results of their previous execution +# instance are processed, there exists a possibility that some +# checks may never get rescheduled. A similar situation exists for +# host checks, although the exact scheduling details differ a bit +# from service checks. Orphaned checks seem to be a rare +# problem and should not happen under normal circumstances. +# If you have problems with service checks never getting +# rescheduled, make sure you have orphaned service checks enabled. +# Values: 1 = enable checks, 0 = disable checks + +check_for_orphaned_services=1 +check_for_orphaned_hosts=1 + + + +# SERVICE FRESHNESS CHECK OPTION +# This option determines whether or not Naemon will periodically +# check the "freshness" of service results. Enabling this option +# is useful for ensuring passive checks are received in a timely +# manner. +# Values: 1 = enabled freshness checking, 0 = disable freshness checking + +check_service_freshness=1 + + + +# SERVICE FRESHNESS CHECK INTERVAL +# This setting determines how often (in seconds) Naemon will +# check the "freshness" of service check results. If you have +# disabled service freshness checking, this option has no effect. + +service_freshness_check_interval=60 + + + +# SERVICE CHECK TIMEOUT STATE +# This setting determines the state Naemon will report when a +# service check times out - that is does not respond within +# service_check_timeout seconds. This can be useful if a +# machine is running at too high a load and you do not want +# to consider a failed service check to be critical (the default). +# Valid settings are: +# c - Critical (default) +# u - Unknown +# w - Warning +# o - OK + +service_check_timeout_state=c + + + +# HOST FRESHNESS CHECK OPTION +# This option determines whether or not Naemon will periodically +# check the "freshness" of host results. Enabling this option +# is useful for ensuring passive checks are received in a timely +# manner. +# Values: 1 = enabled freshness checking, 0 = disable freshness checking + +check_host_freshness=0 + + + +# HOST FRESHNESS CHECK INTERVAL +# This setting determines how often (in seconds) Naemon will +# check the "freshness" of host check results. If you have +# disabled host freshness checking, this option has no effect. + +host_freshness_check_interval=60 + + + + +# ADDITIONAL FRESHNESS THRESHOLD LATENCY +# This setting determines the number of seconds that Naemon +# will add to any host and service freshness thresholds that +# it calculates (those not explicitly specified by the user). + +additional_freshness_latency=15 + + + + +# FLAP DETECTION OPTION +# This option determines whether or not Naemon will try +# and detect hosts and services that are "flapping". +# Flapping occurs when a host or service changes between +# states too frequently. When Naemon detects that a +# host or service is flapping, it will temporarily suppress +# notifications for that host/service until it stops +# flapping. Flap detection is very experimental, so read +# the HTML documentation before enabling this feature! +# Values: 1 = enable flap detection +# 0 = disable flap detection (default) + +enable_flap_detection=1 + + + +# FLAP DETECTION THRESHOLDS FOR HOSTS AND SERVICES +# Read the HTML documentation on flap detection for +# an explanation of what this option does. This option +# has no effect if flap detection is disabled. + +low_service_flap_threshold=5.0 +high_service_flap_threshold=20.0 +low_host_flap_threshold=5.0 +high_host_flap_threshold=20.0 + + + +# DATE FORMAT OPTION +# This option determines how short dates are displayed. Valid options +# include: +# us (MM-DD-YYYY HH:MM:SS) +# euro (DD-MM-YYYY HH:MM:SS) +# iso8601 (YYYY-MM-DD HH:MM:SS) +# strict-iso8601 (YYYY-MM-DDTHH:MM:SS) +# + +date_format=iso8601 + + + + +# TIMEZONE OFFSET +# This option is used to override the default timezone that this +# instance of Naemon runs in. If not specified, Naemon will use +# the system configured timezone. + +#use_timezone=US/Mountain +#use_timezone=Australia/Brisbane + + + +# ILLEGAL OBJECT NAME CHARACTERS +# This option allows you to specify illegal characters that cannot +# be used in host names, service descriptions, or names of other +# object types. + +illegal_object_name_chars=`~!$%^&*|'"<>?,()= + + + +# ILLEGAL MACRO OUTPUT CHARACTERS +# This option allows you to specify illegal characters that are +# stripped from macros before being used in notifications, event +# handlers, etc. This DOES NOT affect macros used in service or +# host check commands. +# The following macros are stripped of the characters you specify: +# $HOSTOUTPUT$ +# $HOSTPERFDATA$ +# $HOSTACKAUTHOR$ +# $HOSTACKCOMMENT$ +# $SERVICEOUTPUT$ +# $SERVICEPERFDATA$ +# $SERVICEACKAUTHOR$ +# $SERVICEACKCOMMENT$ + +illegal_macro_output_chars=`~$&|'"<> + + + +# REGULAR EXPRESSION MATCHING +# This option controls whether or not regular expression matching +# takes place in the object config files. Regular expression +# matching is used to match host, hostgroup, service, and service +# group names/descriptions in some fields of various object types. +# Values: 1 = enable regexp matching, 0 = disable regexp matching + +use_regexp_matching=0 + + + +# "TRUE" REGULAR EXPRESSION MATCHING +# This option controls whether or not "true" regular expression +# matching takes place in the object config files. This option +# only has an effect if regular expression matching is enabled +# (see above). If this option is DISABLED, regular expression +# matching only occurs if a string contains wildcard characters +# (* and ?). If the option is ENABLED, regexp matching occurs +# all the time (which can be annoying). +# Values: 1 = enable true matching, 0 = disable true matching + +use_true_regexp_matching=0 + + + +# ADMINISTRATOR EMAIL/PAGER ADDRESSES +# The email and pager address of a global administrator (likely you). +# Naemon never uses these values itself, but you can access them by +# using the $ADMINEMAIL$ and $ADMINPAGER$ macros in your notification +# commands. + +admin_email=naemon@localhost +admin_pager=pagenaemon@localhost + + + +# DEBUG LEVEL +# This option determines how much (if any) debugging information will +# be written to the debug file. OR values together to log multiple +# types of information. +# Values: +# -1 = Everything +# 0 = Nothing +# 1 = Functions +# 2 = Configuration +# 4 = Process information +# 8 = Scheduled events +# 16 = Host/service checks +# 32 = Notifications +# 64 = Event broker +# 128 = External commands +# 256 = Commands +# 512 = Scheduled downtime +# 1024 = Comments +# 2048 = Macros + +debug_level=0 + + + +# DEBUG VERBOSITY +# This option determines how verbose the debug log out will be. +# Values: 0 = Brief output +# 1 = More detailed +# 2 = Very detailed + +debug_verbosity=1 + + + +# DEBUG FILE +# This option determines where Naemon should write debugging information. + +debug_file=/var/lib/naemon/naemon.debug + + + +# MAX DEBUG FILE SIZE +# This option determines the maximum size (in bytes) of the debug file. If +# the file grows larger than this size, it will be renamed with a .old +# extension. If a file already exists with a .old extension it will +# automatically be deleted. This helps ensure your disk space usage doesn't +# get out of control when debugging Naemon. + +max_debug_file_size=1000000 + + + +# Should we allow hostgroups to have no hosts, we default this to off since +# that was the old behavior + +allow_empty_hostgroup_assignment=0 + + + +# Normally worker count is dynamically allocated based on 1.5 * number of cpu's +# with a minimum of 4 workers. This value will override the defaults + +#check_workers=3 + +# CIRCULAR DEPENDENCIES (EXPERIMENTAL) +# Allow for circular dependencies in naemon's host graph. +# Enabaling this will cause propagation the following to stop working: +# * scheduling downtime +# * enabling notification +# * disabling notification +# This feature is experimental and bugs might occur. + +allow_circular_dependencies=0 diff --git a/modules/profile/templates/monitoring/resource.cfg.erb b/modules/profile/templates/monitoring/resource.cfg.erb new file mode 100644 index 0000000..7da5e66 --- /dev/null +++ b/modules/profile/templates/monitoring/resource.cfg.erb @@ -0,0 +1,30 @@ +########################################################################### +# +# RESOURCE.CFG - Sample Resource File for Naemon 1.0.7 +# +# +# You can define $USERx$ macros in this file, which can in turn be used +# in command definitions in your host config file(s). $USERx$ macros are +# useful for storing sensitive information such as usernames, passwords, +# etc. They are also handy for specifying the path to plugins and +# event handlers - if you decide to move the plugins or event handlers to +# a different directory in the future, you can just update one or two +# $USERx$ macros, instead of modifying a lot of command definitions. +# +# Naemon supports up to 256 $USERx$ macros ($USER1$ through $USER256$) +# +# Resource files may also be used to store configuration directives for +# external data sources like MySQL... +# +########################################################################### + +# Sets $USER1$ to be the path to the plugins +$USER1$=/usr/lib/monitoring-plugins +$USER2$=/etc/naemon/monitoring-plugins + +# Sets $USER2$ to be the path to event handlers +#$USER2$=/usr/lib/monitoring-plugins/eventhandlers + +# Store some usernames and passwords (hidden from the CGIs) +#$USER3$=someuser +#$USER4$=somepassword diff --git a/modules/profile/templates/monitoring/send_nrdp.sh.erb b/modules/profile/templates/monitoring/send_nrdp.sh.erb new file mode 100755 index 0000000..41f58e5 --- /dev/null +++ b/modules/profile/templates/monitoring/send_nrdp.sh.erb @@ -0,0 +1,271 @@ +#!/bin/bash +# +# check_nrdp.sh +# +# Copyright (c) 2010-2017 - Nagios Enterprises, LLC. +# Written by: Scott Wilkerson (nagios@nagios.org) +# +# 2017-09-25 Troy Lea aka BOX293 +# - Fixed script not working with arguments when run as a cron job +# or if being used as a nagios command like obsessive compulsive. +# ... "if [ ! -t 0 ]" was the reason why. +# 2017-12-08 Jørgen van der Meulen (Conclusion Xforce) +# - Fixed typo in NRDP abbreviation + + +PROGNAME=$(basename $0) +RELEASE="Revision 0.6.1" + +print_release() { + echo "$RELEASE" +} + +print_usage() { + echo "" + echo "$PROGNAME $RELEASE - Send NRDP script for Nagios" + echo "" + echo "Usage: send_nrdp.sh -u URL -t token [options]" + echo "" + echo "Usage: $PROGNAME -h display help" + echo "" +} + +print_help() { + print_usage + echo "" + echo "This script is used to send NRDP data to a Nagios server" + echo "" + echo "Required:" + echo " -u"," URL of NRDP server. Usually http:///nrdp/" + echo " -t"," Shared token. Must be the same token set in NRDP Server" + echo "" + echo "Options:" + echo " Single Check:" + echo " -H host name" + echo " -s service name" + echo " -S State" + echo " -o output" + echo "" + echo " STDIN:" + echo " [-d delimiter] (default -d \"\\t\")" + echo " With only the required parameters $PROGNAME is capable of" + echo " processing data piped to it either from a file or other" + echo " process. By default, we use \t as the delimiter however this" + echo " may be specified with the -d option data should be in the" + echo " following formats one entry per line." + echo " For Host checks:" + echo " hostname State output" + echo " For Service checks" + echo " hostname servicename State output" + echo "" + echo " File:" + echo " -f /full/path/to/file" + echo " This file will be sent to the NRDP server specified in -u" + echo " The file should be an XML file in the following format" + echo " ##################################################" + echo "" + echo " " + echo " " + echo " " + echo " YOUR_HOSTNAME" + echo " 0" + echo " OK|perfdata=1.00;5;10;0" + echo " " + echo " " + echo " YOUR_HOSTNAME" + echo " YOUR_SERVICENAME" + echo " 0" + echo " OK|perfdata=1.00;5;10;0" + echo " " + echo " " + echo " ##################################################" + echo "" + echo " Directory:" + echo " -D /path/to/temp/dir" + echo " This is a directory that contains XML files in the format" + echo " above. Additionally, if the -d flag is specified, $PROGNAME" + echo " will create temp files here if the server could not be reached." + echo " On additional calls with the same -D path, if a connection to" + echo " the server is successful, all temp files will be sent." + exit 0 +} + +send_data() { + pdata="token=$token&cmd=submitcheck" + if [ $file ]; then + fdata="--data-urlencode XMLDATA@$file" + rslt=`curl -f --silent --insecure -d "$pdata" $fdata "$url/"` + else + pdata="$pdata&XMLDATA=$1" + rslt=`curl -f --silent --insecure -d "$pdata" "$url/"` + fi + + ret=$? + + status=`echo $rslt | sed -n 's|.*\(.*\).*|\1|p'` + message=`echo $rslt | sed -n 's|.*\(.*\).*|\1|p'` + if [ $ret != 0 ];then + echo "ERROR: could not connect to NRDP server at $url" + # verify we are not processing the directory already and then write to the directory + if [ ! "$2" ] && [ $directory ];then + if [ ! -d "$directory" ];then + mkdir -p "$directory" + fi + # This is where we write to the tmp directory + echo $xml > `mktemp $directory/nrdp.XXXXXX` + fi + exit 1 + fi + + if [ "$status" != "0" ];then + # This means we couldn't connect to NRPD server + echo "ERROR: The NRDP Server said $message" + # verify we are not processing the directory already and then write to the directory + if [ ! "$2" ] && [ $directory ];then + if [ ! -d "$directory" ];then + mkdir -p "$directory" + fi + # This is where we write to the tmp directory + echo $xml > `mktemp $directory/nrdp.XXXXXX` + fi + + exit 2 + fi + + # If this was a directory call and was successful, remove the file + if [ $2 ] && [ "$status" == "0" ];then + rm -f "$2" + fi + + # If we weren't successful error + if [ $ret != 0 ];then + echo "exited with error "$ret + exit $ret + fi +} + +# Parse parameters +url="<%= @naemon_url %>" +token="<%= @naemon_token %>" + +while getopts "u:t:H:s:S:o:f:d:c:D:hv" option +do + case $option in + u) url=$OPTARG ;; + t) token=$OPTARG ;; + H) host=$OPTARG ;; + s) service=$OPTARG ;; + S) State=$OPTARG ;; + o) output=$OPTARG ;; + f) file=$OPTARG ;; + d) delim=$OPTARG ;; + c) checktype=$OPTARG ;; + D) directory=$OPTARG ;; + h) print_help 0;; + v) print_release + exit 0 ;; + esac +done + +if [ ! $checktype ]; then + checktype=1 +fi +if [ ! $delim ]; then + delim=`echo -e "\t"` +fi + +if [ "x$url" == "x" -o "x$token" == "x" ] +then + echo "Usage: send_nrdp -u url -t token" + exit 1 +fi +# detecting curl +if [[ `which curl` =~ "/curl" ]] + then curl=1; +fi + +if [[ ! $curl ]]; +then + echo "Either curl or wget are required to run $PROGNAME" + exit 1 +fi + +checkcount=0 + +if [ $host ]; then + xml="" + # we are not getting piped results + if [ "$host" == "" ] || [ "$State" == "" ]; then + echo "You must provide a host -H and State -S" + exit 2 + fi + if [ "$service" != "" ]; then + xml="$xml$service" + else + xml="$xml" + fi + + # urlencode XML special chars + output=${output//&/%26} + output=${output///%3E} + + xml="$xml$host$State" + checkcount=1 +fi + + # If only url and token have been provided then it is assumed that data is being piped +######################## +if [[ ! $host && ! $State && ! $file && ! $directory ]]; then + xml="" + # we know we are being piped results + IFS=$delim + + while read -r line ; do + arr=($line) + if [ ${#arr[@]} != 0 ];then + if [[ ${#arr[@]} < 3 ]] || [[ ${#arr[@]} > 4 ]];then + echo "ERROR: STDIN must be either 3 or 4 fields long, I found "${#arr[@]} + else + if [ ${#arr[@]} == 4 ]; then + xml="$xml + ${arr[1]} + ${arr[0]} + ${arr[2]} + ${arr[3]}" + else + xml="$xml + ${arr[0]} + ${arr[1]} + ${arr[2]}" + fi + + xml="$xml" + checkcount=$[checkcount+1] + fi + fi + done + IFS=" " +fi + +if [ $file ]; then + xml=`cat $file` + send_data "$xml" +fi + +if [ $directory ]; then + #echo "Processing directory..." + for f in `ls $directory` + do + #echo "Processing $f file..." + # take action on each file. $f store current file name + xml=`cat $directory/$f` + send_data "$xml" "$directory/$f" + done +fi + +if [ "x$file" == "x" ] && [ "x$directory" == "x" ]; then + xml="$xml" + send_data "$xml" + echo "Sent $checkcount checks to $url" +fi diff --git a/modules/role/manifests/caldance.pp b/modules/role/manifests/caldance.pp index 75d9dbd..b7948e6 100644 --- a/modules/role/manifests/caldance.pp +++ b/modules/role/manifests/caldance.pp @@ -6,6 +6,7 @@ class role::caldance ( include "profile::postgresql" include "profile::apache" include "profile::redis" + include "profile::monitoring" ensure_packages(["python-pip", "python-virtualenv", "python-django"]) } -- cgit v1.2.3 From 6962463657db999c33f1aabe60e0567be218918d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Isma=C3=ABl=20Bouya?= Date: Sun, 8 Jul 2018 15:35:43 +0200 Subject: Prepare master monitoring --- modules/profile/manifests/monitoring/commands.pp | 1 + modules/profile/manifests/monitoring/contacts.pp | 13 +-- modules/profile/manifests/monitoring/hosts.pp | 26 +++--- .../profile/manifests/monitoring/local_service.pp | 65 +++++++++++++ modules/profile/manifests/monitoring/services.pp | 102 +++++++-------------- modules/profile/manifests/monitoring/times.pp | 13 +-- 6 files changed, 127 insertions(+), 93 deletions(-) create mode 100644 modules/profile/manifests/monitoring/local_service.pp (limited to 'modules') diff --git a/modules/profile/manifests/monitoring/commands.pp b/modules/profile/manifests/monitoring/commands.pp index 7e9683f..a25f7b3 100644 --- a/modules/profile/manifests/monitoring/commands.pp +++ b/modules/profile/manifests/monitoring/commands.pp @@ -31,6 +31,7 @@ class profile::monitoring::commands inherits profile::monitoring { target => "/etc/naemon/objects.cfg", notify => Service["naemon"], before => Service["naemon"], + require => File["/etc/naemon"], } nagios_command { diff --git a/modules/profile/manifests/monitoring/contacts.pp b/modules/profile/manifests/monitoring/contacts.pp index 284d51c..107af18 100644 --- a/modules/profile/manifests/monitoring/contacts.pp +++ b/modules/profile/manifests/monitoring/contacts.pp @@ -1,11 +1,12 @@ class profile::monitoring::contacts { Nagios_contactgroup { - ensure => "present", - owner => "naemon", - group => "naemon", - target => "/etc/naemon/objects.cfg", - notify => Service["naemon"], - before => Service["naemon"], + ensure => "present", + owner => "naemon", + group => "naemon", + target => "/etc/naemon/objects.cfg", + notify => Service["naemon"], + before => Service["naemon"], + require => File["/etc/naemon"], } nagios_contactgroup { "admins": diff --git a/modules/profile/manifests/monitoring/hosts.pp b/modules/profile/manifests/monitoring/hosts.pp index 306fe3e..3c8bc73 100644 --- a/modules/profile/manifests/monitoring/hosts.pp +++ b/modules/profile/manifests/monitoring/hosts.pp @@ -2,21 +2,23 @@ class profile::monitoring::hosts { $real_hostname = lookup("base_installation::real_hostname") Nagios_hostgroup { - ensure => "present", - owner => "naemon", - group => "naemon", - target => "/etc/naemon/objects.cfg", - notify => Service["naemon"], - before => Service["naemon"], + ensure => "present", + owner => "naemon", + group => "naemon", + target => "/etc/naemon/objects.cfg", + notify => Service["naemon"], + before => Service["naemon"], + require => File["/etc/naemon"], } Nagios_host { - ensure => "present", - owner => "naemon", - group => "naemon", - target => "/etc/naemon/objects.cfg", - notify => Service["naemon"], - before => Service["naemon"], + ensure => "present", + owner => "naemon", + group => "naemon", + target => "/etc/naemon/objects.cfg", + notify => Service["naemon"], + before => Service["naemon"], + require => File["/etc/naemon"], } nagios_hostgroup { "linux-servers": diff --git a/modules/profile/manifests/monitoring/local_service.pp b/modules/profile/manifests/monitoring/local_service.pp new file mode 100644 index 0000000..3b39d1f --- /dev/null +++ b/modules/profile/manifests/monitoring/local_service.pp @@ -0,0 +1,65 @@ +define profile::monitoring::local_service ( + Optional[Hash] $common = {}, + Optional[Hash] $master = {}, + Optional[Hash] $local = {}, +) { + $service_description = $title + $real_hostname = lookup("base_installation::real_hostname") + + $service_generic = { + active_checks_enabled => "1", + check_freshness => "0", + check_interval => "10", + check_period => "24x7", + contact_groups => "admins", + event_handler_enabled => "1", + flap_detection_enabled => "1", + is_volatile => "0", + max_check_attempts => "3", + notification_interval => "60", + notification_options => "w,u,c,r", + notification_period => "24x7", + notifications_enabled => "0", + obsess_over_service => "1", + passive_checks_enabled => "1", + process_perf_data => "1", + retain_nonstatus_information => "1", + retain_status_information => "1", + retry_interval => "2", + } + + $service_local = merge($service_generic, { + host_name => $real_hostname, + check_interval => "5", + max_check_attempts => "4", + retry_interval => "1", + }) + + + [true, false].each |$services_for_master| { + if $services_for_master { + $default_local = { + mode => "0644", + target => "/etc/naemon/services_for_master.cfg", + check_interval => $service_local["check_interval"], + retry_interval => $service_local["retry_interval"], + host_name => $service_local["host_name"], + use => "external-service", + notify => [], + } + $t = "master - " + $services_key = $master + } else { + $default_local = merge($service_local, { target => "/etc/naemon/objects.cfg" }) + $t = "" + $services_key = $local + } + + $hash = merge($default_local, $common, $services_key) + + nagios_service { "$t$service_description": + service_description => $service_description, + * => $hash + } + } +} diff --git a/modules/profile/manifests/monitoring/services.pp b/modules/profile/manifests/monitoring/services.pp index 39c2def..6e59ab1 100644 --- a/modules/profile/manifests/monitoring/services.pp +++ b/modules/profile/manifests/monitoring/services.pp @@ -1,45 +1,14 @@ class profile::monitoring::services { - $real_hostname = lookup("base_installation::real_hostname") Nagios_service { - ensure => "present", - owner => "naemon", - group => "naemon", - target => "/etc/naemon/objects.cfg", - notify => Service["naemon"], - before => Service["naemon"], + ensure => "present", + owner => "naemon", + group => "naemon", + notify => Service["naemon"], + before => Service["naemon"], + require => File["/etc/naemon"], } - $service_generic = { - active_checks_enabled => "1", - check_freshness => "0", - check_interval => "10", - check_period => "24x7", - contact_groups => "admins", - event_handler_enabled => "1", - flap_detection_enabled => "1", - is_volatile => "0", - max_check_attempts => "3", - notification_interval => "60", - notification_options => "w,u,c,r", - notification_period => "24x7", - notifications_enabled => "1", - obsess_over_service => "1", - passive_checks_enabled => "1", - process_perf_data => "1", - retain_nonstatus_information => "1", - retain_status_information => "1", - retry_interval => "2", - } - - - $service_local = merge($service_generic, { - host_name => $real_hostname, - check_interval => "5", - max_check_attempts => "4", - retry_interval => "1", - }) - sudo::conf { default: sudo_file_name => "naemon"; @@ -47,43 +16,38 @@ class profile::monitoring::services { content => "naemon ALL=(root) NOPASSWD: /usr/bin/fail2ban-client ping"; } - nagios_service { - default: * => $service_local; + profile::monitoring::local_service { "Size on root partition": - service_description => "Size on root partition", - check_command => "check_local_disk!20%!10%!/"; + local => { + check_command => "check_local_disk!20%!10%!/", + }; "Total number of process": - service_description => "Total number of process", - check_command => "check_local_procs!250!400!RSZDT"; + local => { + check_command => "check_local_procs!50!100!RSZDT", + }; "Average load": - service_description => "Average load", - check_command => "check_local_load!8.0,8.0,8.0!10.0,10.0,10.0"; + local => { + check_command => "check_local_load!8.0,8.0,8.0!10.0,10.0,10.0", + }; "Swap usage": - service_description => "Swap usage", - check_command => "check_local_swap!20!10"; + local => { + check_command => "check_local_swap!20!10", + }; "fail2ban is active": - service_description => "fail2ban is active", - check_command => "check_command_output!fail2ban-client ping!pong!-r root", - require => Sudo::Conf["naemon-fail2ban"]; + local => { + check_command => "check_command_output!fail2ban-client ping!pong!-r root", + require => Sudo::Conf["naemon-fail2ban"], + }; "NTP is activated and working": - service_description => "NTP is activated and working", - check_command => "check_ntp"; - } - - if empty(find_file("/proc/mdstat")) { - nagios_service { - default: * => $service_local; - "No mdadm array is degraded": - ensure => "absent", - service_description => "No mdadm array is degraded", - check_command => "check_md_raid"; - } - } else { - nagios_service { - default: * => $service_local; - "No mdadm array is degraded": - service_description => "No mdadm array is degraded", - check_command => "check_md_raid"; - } + local => { + check_command => "check_ntp", + }; + "No mdadm array is degraded": + common => { + ensure => (find_file("/proc/mdstat") == undef) ? { true => "absent", default =>"present" }, + }, + local => { + check_command => "check_md_raid", + }; } } diff --git a/modules/profile/manifests/monitoring/times.pp b/modules/profile/manifests/monitoring/times.pp index fb61acc..25bf86b 100644 --- a/modules/profile/manifests/monitoring/times.pp +++ b/modules/profile/manifests/monitoring/times.pp @@ -1,11 +1,12 @@ class profile::monitoring::times { Nagios_timeperiod { - ensure => "present", - owner => "naemon", - group => "naemon", - target => "/etc/naemon/objects.cfg", - notify => Service["naemon"], - before => Service["naemon"], + ensure => "present", + owner => "naemon", + group => "naemon", + target => "/etc/naemon/objects.cfg", + notify => Service["naemon"], + before => Service["naemon"], + require => File["/etc/naemon"], } nagios_timeperiod { "24x7": -- cgit v1.2.3 From b5305b5cad5cbb0a2c072b29f2d4dc05126c39d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Isma=C3=ABl=20Bouya?= Date: Sun, 8 Jul 2018 21:51:30 +0200 Subject: Add postgresql monitoring --- .../profile/files/monitoring/check_last_file_date | 31 +++++++++++++++++ .../files/monitoring/check_postgres_replication | 35 +++++++++++++++++++ modules/profile/manifests/monitoring/commands.pp | 40 +++++++++++++--------- .../profile/manifests/monitoring/local_service.pp | 16 +++++++++ modules/profile/manifests/monitoring/services.pp | 24 ++++--------- modules/profile/manifests/postgresql.pp | 8 +++++ .../profile/manifests/postgresql/backup_dump.pp | 9 +++++ .../manifests/postgresql/backup_pgbouncer.pp | 10 ++++++ modules/profile/manifests/postgresql/master.pp | 10 ++++++ modules/profile/manifests/postgresql/ssl.pp | 1 + 10 files changed, 149 insertions(+), 35 deletions(-) create mode 100644 modules/profile/files/monitoring/check_last_file_date create mode 100644 modules/profile/files/monitoring/check_postgres_replication (limited to 'modules') diff --git a/modules/profile/files/monitoring/check_last_file_date b/modules/profile/files/monitoring/check_last_file_date new file mode 100644 index 0000000..8eabb57 --- /dev/null +++ b/modules/profile/files/monitoring/check_last_file_date @@ -0,0 +1,31 @@ +#!/bin/bash + +STATE_OK=0 +STATE_WARNING=1 +STATE_CRITICAL=2 +STATE_UNKNOWN=3 + +base_path=$1 +hours=$2 +as_user=$3 + +if [ -z "$as_user" ]; then + last_date=$(find $base_path -mindepth 1 -maxdepth 1 -printf "%T@\n" 2>/dev/null | sort | tail -n 1) +else + last_date=$(sudo -u "$as_user" find $base_path -mindepth 1 -maxdepth 1 -printf "%T@\n" 2>/dev/null | sort | tail -n 1) +fi + +if [ -z "$last_date" ]; then + echo "UNKNOWN: Could not read folder" + exit $STATE_UNKNOWN +else + LC_ALL=C last_date=$(printf "%.*f" 0 $last_date) + min_date=$(date -d "$hours hours ago" "+%s") + if [ "$min_date" -lt "$last_date" ]; then + echo "OK: Last backup $(date -d @$last_date)" + exit $STATE_OK + else + echo "CRITICAL: Last backup $(date -d @$last_date)" + exit $STATE_CRITICAL + fi +fi diff --git a/modules/profile/files/monitoring/check_postgres_replication b/modules/profile/files/monitoring/check_postgres_replication new file mode 100644 index 0000000..163c68a --- /dev/null +++ b/modules/profile/files/monitoring/check_postgres_replication @@ -0,0 +1,35 @@ +#!/bin/bash + +STATE_OK=0 +STATE_WARNING=1 +STATE_CRITICAL=2 +STATE_UNKNOWN=3 + +user=$1 +host=$2 +port=$3 + +lag=$(psql -h $host -p $port -A -t -c "SELECT COALESCE(EXTRACT(EPOCH FROM replay_lag),0) FROM pg_stat_replication WHERE usename='$user'" 2>/dev/null) +exit_code=$? + +if [[ $exit_code -ne 0 ]]; then + echo "UNKNOWN:Impossible to run psql command" + exit $STATE_UNKNOWN +elif [[ -z "$lag" ]]; then + echo "UNKNOWN:No replication found for $user" + exit $STATE_UNKNOWN +else + output="Replication lag for $user is ${lag}s" + LC_ALL=C lag=$(printf "%.*f" 0 $lag) + + if [[ $lag -lt 5 ]]; then + echo "OK:$output" + exit $STATE_OK + elif [[ $lag -lt 10 ]]; then + echo "WARNING:$output" + exit $STATE_WARNING + else + echo "CRITICAL:$output" + exit $STATE_CRITICAL + fi +fi diff --git a/modules/profile/manifests/monitoring/commands.pp b/modules/profile/manifests/monitoring/commands.pp index a25f7b3..4ccc816 100644 --- a/modules/profile/manifests/monitoring/commands.pp +++ b/modules/profile/manifests/monitoring/commands.pp @@ -3,25 +3,24 @@ class profile::monitoring::commands inherits profile::monitoring { file { "/etc/naemon/monitoring-plugins": ensure => "directory", - owner => "naemon", - group => "naemon", - mode => "0700", - } - - file { "/etc/naemon/monitoring-plugins/check_command": - ensure => "present", - owner => "naemon", + owner => "root", group => "naemon", - mode => "0700", - source => "puppet:///modules/profile/monitoring/check_command", + mode => "0755", } - file { "/etc/naemon/monitoring-plugins/check_md_raid": - ensure => "present", - owner => "naemon", - group => "naemon", - mode => "0700", - source => "puppet:///modules/profile/monitoring/check_md_raid", + [ + "check_command", + "check_md_raid", + "check_postgres_replication", + "check_last_file_date", + ].each |$file| { + file { "/etc/naemon/monitoring-plugins/$file": + ensure => "present", + owner => "root", + group => "naemon", + mode => "0755", + source => "puppet:///modules/profile/monitoring/$file", + } } Nagios_command { @@ -48,10 +47,17 @@ class profile::monitoring::commands inherits profile::monitoring { "check_ntp": command_line => '$USER1$/check_ntp_time -H 0.arch.pool.ntp.org'; "check_md_raid": - command_line => '$USER2$/check_md_raid'; + command_line => '$USER2$/check_md_raid', + require => File["/etc/naemon/monitoring-plugins/check_md_raid"]; "check_command_output": command_line => '$USER2$/check_command -c "$ARG1$" -o "$ARG2$" $ARG3$', require => File["/etc/naemon/monitoring-plugins/check_command"]; + "check_postgresql_replication": + command_line => '/usr/bin/sudo -u postgres $USER2$/check_postgres_replication "$ARG1$" "$ARG2$" "$ARG3$"', + require => File["/etc/naemon/monitoring-plugins/check_postgres_replication"]; + "check_last_file_date": + command_line => '$USER2$/check_last_file_date "$ARG1$" "$ARG2$" "$ARG3$"', + require => File["/etc/naemon/monitoring-plugins/check_last_file_date"], } unless empty($naemon_url) { diff --git a/modules/profile/manifests/monitoring/local_service.pp b/modules/profile/manifests/monitoring/local_service.pp index 3b39d1f..0caf72e 100644 --- a/modules/profile/manifests/monitoring/local_service.pp +++ b/modules/profile/manifests/monitoring/local_service.pp @@ -1,4 +1,5 @@ define profile::monitoring::local_service ( + Optional[Hash] $sudos = {}, Optional[Hash] $common = {}, Optional[Hash] $master = {}, Optional[Hash] $local = {}, @@ -6,6 +7,15 @@ define profile::monitoring::local_service ( $service_description = $title $real_hostname = lookup("base_installation::real_hostname") + Nagios_service { + ensure => "present", + owner => "naemon", + group => "naemon", + notify => Service["naemon"], + before => Service["naemon"], + require => File["/etc/naemon"], + } + $service_generic = { active_checks_enabled => "1", check_freshness => "0", @@ -35,6 +45,12 @@ define profile::monitoring::local_service ( retry_interval => "1", }) + $sudos.each |$sudo_name, $content| { + sudo::conf { $sudo_name: + content => $content, + before => Nagios_service[$service_description], + } + } [true, false].each |$services_for_master| { if $services_for_master { diff --git a/modules/profile/manifests/monitoring/services.pp b/modules/profile/manifests/monitoring/services.pp index 6e59ab1..b20a352 100644 --- a/modules/profile/manifests/monitoring/services.pp +++ b/modules/profile/manifests/monitoring/services.pp @@ -1,21 +1,5 @@ class profile::monitoring::services { - Nagios_service { - ensure => "present", - owner => "naemon", - group => "naemon", - notify => Service["naemon"], - before => Service["naemon"], - require => File["/etc/naemon"], - } - - sudo::conf { - default: - sudo_file_name => "naemon"; - 'naemon-fail2ban': - content => "naemon ALL=(root) NOPASSWD: /usr/bin/fail2ban-client ping"; - } - profile::monitoring::local_service { "Size on root partition": local => { @@ -34,9 +18,11 @@ class profile::monitoring::services { check_command => "check_local_swap!20!10", }; "fail2ban is active": - local => { + sudos => { + "naemon-fail2ban" => "naemon ALL=(root) NOPASSWD: /usr/bin/fail2ban-client ping", + }, + local => { check_command => "check_command_output!fail2ban-client ping!pong!-r root", - require => Sudo::Conf["naemon-fail2ban"], }; "NTP is activated and working": local => { @@ -50,4 +36,6 @@ class profile::monitoring::services { check_command => "check_md_raid", }; } + + Profile::Monitoring::Local_service <| |> } diff --git a/modules/profile/manifests/postgresql.pp b/modules/profile/manifests/postgresql.pp index 97ce572..edd6ea6 100644 --- a/modules/profile/manifests/postgresql.pp +++ b/modules/profile/manifests/postgresql.pp @@ -28,5 +28,13 @@ class profile::postgresql ( profile::postgresql::base_pg_hba_rules { "default": } + @profile::monitoring::local_service { "Databases are present in postgresql": + sudos => { + "naemon-postgresql-database-public" => "naemon ALL=(postgres) NOPASSWD: /usr/bin/psql -c select\ nspname\ from\ pg_catalog.pg_namespace" + }, + local => { + check_command => "check_command_output!psql -c 'select nspname from pg_catalog.pg_namespace'!public!-r postgres", + } + } } diff --git a/modules/profile/manifests/postgresql/backup_dump.pp b/modules/profile/manifests/postgresql/backup_dump.pp index 53fb20e..5e469c5 100644 --- a/modules/profile/manifests/postgresql/backup_dump.pp +++ b/modules/profile/manifests/postgresql/backup_dump.pp @@ -57,4 +57,13 @@ define profile::postgresql::backup_dump ( }, ] } + + @profile::monitoring::local_service { "Last postgresql dump in $pg_backup_path is not too old": + sudos => { + "naemon-postgresql-dumps" => "naemon ALL=($pg_user) NOPASSWD: /usr/bin/find $pg_backup_path -mindepth 1 -maxdepth 1 -printf %T@\\n", + } + local => { + check_command => "check_last_file_date!$pg_backup_path!10!$pg_user" + } + } } diff --git a/modules/profile/manifests/postgresql/backup_pgbouncer.pp b/modules/profile/manifests/postgresql/backup_pgbouncer.pp index 45b8ed5..c82eefd 100644 --- a/modules/profile/manifests/postgresql/backup_pgbouncer.pp +++ b/modules/profile/manifests/postgresql/backup_pgbouncer.pp @@ -48,6 +48,16 @@ define profile::postgresql::backup_pgbouncer ( content => "${pg_infos[pgbouncer_dbname]} = host=$pg_path$pg_port user=${pg_infos[dbuser]} dbname=${pg_infos[dbname]}", } + # FIXME: current pam configuration requires password for postgres + # @profile::monitoring::local_service { "Database ${pg_infos[pgbouncer_dbname]} is available in pgbouncer": + # sudos => { + # "naemon-postgresql-database-public" => "naemon ALL=(postgres) NOPASSWD: /usr/bin/psql -c select\ nspname\ from\ pg_catalog.pg_namespace ${pg_infos[pgbouncer_dbname]}" + # }, + # local => { + # check_command => "check_command_output!psql -c 'select nspname from pg_catalog.pg_namespace' ${pg_infos[pgbouncer_dbname]}!public!-r postgres", + # } + # } + # pg_hba for accessed cluster postgresql::server::pg_hba_rule { "$pg_backup_host - local access as ${pg_infos[dbuser]} user": description => "Allow local access to ${pg_infos[dbuser]} user", diff --git a/modules/profile/manifests/postgresql/master.pp b/modules/profile/manifests/postgresql/master.pp index 02315a6..e775eb4 100644 --- a/modules/profile/manifests/postgresql/master.pp +++ b/modules/profile/manifests/postgresql/master.pp @@ -59,5 +59,15 @@ define profile::postgresql::master ( handle_slot => true, add_self_role => true, } + + @profile::monitoring::local_service { "Postgresql replication for $backup_host is up to date": + sudos => { + "naemon-postgresql-replication-$backup_host" => "naemon ALL=(postgres) NOPASSWD: /etc/naemon/monitoring-plugins/check_postgres_replication $backup_host /run/postgresql 5432" + + }, + local => { + check_command => "check_postgresql_replication!$backup_host!/run/postgresql/!5432", + } + } } } diff --git a/modules/profile/manifests/postgresql/ssl.pp b/modules/profile/manifests/postgresql/ssl.pp index dc56c0b..9b0a95c 100644 --- a/modules/profile/manifests/postgresql/ssl.pp +++ b/modules/profile/manifests/postgresql/ssl.pp @@ -79,4 +79,5 @@ define profile::postgresql::ssl ( } } + # FIXME: add monitoring for ssl } -- cgit v1.2.3 From 2742958fd69c91c442685be62140f1e29e363b95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Isma=C3=ABl=20Bouya?= Date: Tue, 10 Jul 2018 12:31:32 +0200 Subject: Move todos to readme.md and add first documentation --- modules/profile/manifests/postgresql/backup_pgbouncer.pp | 2 +- modules/profile/manifests/postgresql/ssl.pp | 2 -- modules/role/manifests/cryptoportfolio.pp | 1 - 3 files changed, 1 insertion(+), 4 deletions(-) (limited to 'modules') diff --git a/modules/profile/manifests/postgresql/backup_pgbouncer.pp b/modules/profile/manifests/postgresql/backup_pgbouncer.pp index c82eefd..5fd7861 100644 --- a/modules/profile/manifests/postgresql/backup_pgbouncer.pp +++ b/modules/profile/manifests/postgresql/backup_pgbouncer.pp @@ -48,7 +48,7 @@ define profile::postgresql::backup_pgbouncer ( content => "${pg_infos[pgbouncer_dbname]} = host=$pg_path$pg_port user=${pg_infos[dbuser]} dbname=${pg_infos[dbname]}", } - # FIXME: current pam configuration requires password for postgres + # Current pam configuration requires password for postgres # @profile::monitoring::local_service { "Database ${pg_infos[pgbouncer_dbname]} is available in pgbouncer": # sudos => { # "naemon-postgresql-database-public" => "naemon ALL=(postgres) NOPASSWD: /usr/bin/psql -c select\ nspname\ from\ pg_catalog.pg_namespace ${pg_infos[pgbouncer_dbname]}" diff --git a/modules/profile/manifests/postgresql/ssl.pp b/modules/profile/manifests/postgresql/ssl.pp index 9b0a95c..b809a9d 100644 --- a/modules/profile/manifests/postgresql/ssl.pp +++ b/modules/profile/manifests/postgresql/ssl.pp @@ -78,6 +78,4 @@ define profile::postgresql::ssl ( content => "ssl = on\nssl_key_file = '$ssl_key'\nssl_cert_file = '$ssl_cert'\n" } } - - # FIXME: add monitoring for ssl } diff --git a/modules/role/manifests/cryptoportfolio.pp b/modules/role/manifests/cryptoportfolio.pp index c675e91..8f7bfca 100644 --- a/modules/role/manifests/cryptoportfolio.pp +++ b/modules/role/manifests/cryptoportfolio.pp @@ -41,7 +41,6 @@ class role::cryptoportfolio ( contain "role::cryptoportfolio::bot" } - # FIXME: restore backup unless empty($front_version) { contain "role::cryptoportfolio::front" } -- cgit v1.2.3 From a0df248a2be61557b8a67c3d6e4df24dc3e7843e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Isma=C3=ABl=20Bouya?= Date: Tue, 10 Jul 2018 12:36:52 +0200 Subject: Add monitoring for etherpad --- modules/base_installation/manifests/package_managers.pp | 11 +++++++++-- modules/profile/files/monitoring/check_postgres_replication | 10 +++++----- modules/profile/manifests/monitoring.pp | 7 +++++++ modules/profile/manifests/postgresql.pp | 2 +- modules/profile/manifests/postgresql/master.pp | 2 +- modules/role/manifests/etherpad.pp | 1 + 6 files changed, 24 insertions(+), 9 deletions(-) (limited to 'modules') diff --git a/modules/base_installation/manifests/package_managers.pp b/modules/base_installation/manifests/package_managers.pp index a03085d..f4df186 100644 --- a/modules/base_installation/manifests/package_managers.pp +++ b/modules/base_installation/manifests/package_managers.pp @@ -15,15 +15,22 @@ class base_installation::package_managers inherits base_installation { pacman::repo { 'multilib': order => 15, - include => '/etc/pacman.d/mirrorlist' + include => '/etc/pacman.d/mirrorlist', } pacman::repo { 'immae': order => 0, server => 'https://git.immae.eu/releases/packages/', - siglevel => 'Optional' + siglevel => 'Optional', } + exec { "refresh pacman": + command => "/usr/bin/pacman -Sy", + refreshonly => true, + } + + Concat["/etc/pacman.conf"] ~> Exec["refresh pacman"] -> Package <| name != "pacman" |> + class { 'aur': } contain "pacman" diff --git a/modules/profile/files/monitoring/check_postgres_replication b/modules/profile/files/monitoring/check_postgres_replication index 163c68a..a550077 100644 --- a/modules/profile/files/monitoring/check_postgres_replication +++ b/modules/profile/files/monitoring/check_postgres_replication @@ -13,23 +13,23 @@ lag=$(psql -h $host -p $port -A -t -c "SELECT COALESCE(EXTRACT(EPOCH FROM replay exit_code=$? if [[ $exit_code -ne 0 ]]; then - echo "UNKNOWN:Impossible to run psql command" + echo "UNKNOWN - Impossible to run psql command" exit $STATE_UNKNOWN elif [[ -z "$lag" ]]; then - echo "UNKNOWN:No replication found for $user" + echo "UNKNOWN - No replication found for $user" exit $STATE_UNKNOWN else output="Replication lag for $user is ${lag}s" LC_ALL=C lag=$(printf "%.*f" 0 $lag) if [[ $lag -lt 5 ]]; then - echo "OK:$output" + echo "OK - $output" exit $STATE_OK elif [[ $lag -lt 10 ]]; then - echo "WARNING:$output" + echo "WARNING - $output" exit $STATE_WARNING else - echo "CRITICAL:$output" + echo "CRITICAL - $output" exit $STATE_CRITICAL fi fi diff --git a/modules/profile/manifests/monitoring.pp b/modules/profile/manifests/monitoring.pp index beabe70..7df4ce9 100644 --- a/modules/profile/manifests/monitoring.pp +++ b/modules/profile/manifests/monitoring.pp @@ -28,6 +28,13 @@ class profile::monitoring ( content => template("profile/monitoring/naemon.cfg.erb"), } -> + file { "/etc/naemon/objects.cfg": + ensure => "file", + owner => "naemon", + group => "naemon", + mode => "0600" + } + -> service { "naemon": ensure => "running", enable => true, diff --git a/modules/profile/manifests/postgresql.pp b/modules/profile/manifests/postgresql.pp index edd6ea6..fedbcb1 100644 --- a/modules/profile/manifests/postgresql.pp +++ b/modules/profile/manifests/postgresql.pp @@ -30,7 +30,7 @@ class profile::postgresql ( @profile::monitoring::local_service { "Databases are present in postgresql": sudos => { - "naemon-postgresql-database-public" => "naemon ALL=(postgres) NOPASSWD: /usr/bin/psql -c select\ nspname\ from\ pg_catalog.pg_namespace" + "naemon-postgresql-database-public" => "naemon ALL=(postgres) NOPASSWD: /usr/bin/psql -c select\\ nspname\\ from\\ pg_catalog.pg_namespace" }, local => { check_command => "check_command_output!psql -c 'select nspname from pg_catalog.pg_namespace'!public!-r postgres", diff --git a/modules/profile/manifests/postgresql/master.pp b/modules/profile/manifests/postgresql/master.pp index e775eb4..99ac4c4 100644 --- a/modules/profile/manifests/postgresql/master.pp +++ b/modules/profile/manifests/postgresql/master.pp @@ -66,7 +66,7 @@ define profile::postgresql::master ( }, local => { - check_command => "check_postgresql_replication!$backup_host!/run/postgresql/!5432", + check_command => "check_postgresql_replication!$backup_host!/run/postgresql!5432", } } } diff --git a/modules/role/manifests/etherpad.pp b/modules/role/manifests/etherpad.pp index 5ab5023..1a86389 100644 --- a/modules/role/manifests/etherpad.pp +++ b/modules/role/manifests/etherpad.pp @@ -22,6 +22,7 @@ class role::etherpad ( include "profile::tools" include "profile::postgresql" include "profile::apache" + include "profile::monitoring" ensure_packages(["npm"]) ensure_packages(["abiword"]) -- cgit v1.2.3 From bff7d1b743bc2239d7ce7ced37dd03e4e846c03d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Isma=C3=ABl=20Bouya?= Date: Tue, 10 Jul 2018 10:45:04 +0200 Subject: Add monitoring for backup --- modules/profile/manifests/fstab.pp | 6 ++++++ modules/profile/manifests/monitoring/local_service.pp | 8 ++++---- modules/profile/manifests/postgresql/backup_dump.pp | 6 +++--- modules/role/manifests/backup.pp | 1 + 4 files changed, 14 insertions(+), 7 deletions(-) (limited to 'modules') diff --git a/modules/profile/manifests/fstab.pp b/modules/profile/manifests/fstab.pp index 8ecfe72..3af316e 100644 --- a/modules/profile/manifests/fstab.pp +++ b/modules/profile/manifests/fstab.pp @@ -15,6 +15,12 @@ class profile::fstab ( device => "UUID=${infos[1]}", fstype => $infos[2] } + + @profile::monitoring::local_service { "Size on ${infos[0]} partition": + local => { + check_command => "check_local_disk!10%!5%!${infos[0]}", + }; + } } } } diff --git a/modules/profile/manifests/monitoring/local_service.pp b/modules/profile/manifests/monitoring/local_service.pp index 0caf72e..9af7c7a 100644 --- a/modules/profile/manifests/monitoring/local_service.pp +++ b/modules/profile/manifests/monitoring/local_service.pp @@ -46,10 +46,10 @@ define profile::monitoring::local_service ( }) $sudos.each |$sudo_name, $content| { - sudo::conf { $sudo_name: - content => $content, - before => Nagios_service[$service_description], - } + ensure_resource("sudo::conf", $sudo_name, { + content => $content, + before => Nagios_service[$service_description], + }) } [true, false].each |$services_for_master| { diff --git a/modules/profile/manifests/postgresql/backup_dump.pp b/modules/profile/manifests/postgresql/backup_dump.pp index 5e469c5..5f456ed 100644 --- a/modules/profile/manifests/postgresql/backup_dump.pp +++ b/modules/profile/manifests/postgresql/backup_dump.pp @@ -60,10 +60,10 @@ define profile::postgresql::backup_dump ( @profile::monitoring::local_service { "Last postgresql dump in $pg_backup_path is not too old": sudos => { - "naemon-postgresql-dumps" => "naemon ALL=($pg_user) NOPASSWD: /usr/bin/find $pg_backup_path -mindepth 1 -maxdepth 1 -printf %T@\\n", - } + "naemon-postgresql-dumps-$pg_host" => "naemon ALL=($pg_user) NOPASSWD: /usr/bin/find $pg_backup_path -mindepth 1 -maxdepth 1 -printf %T@?n", + }, local => { - check_command => "check_last_file_date!$pg_backup_path!10!$pg_user" + check_command => "check_last_file_date!$pg_backup_path!10!$pg_user", } } } diff --git a/modules/role/manifests/backup.pp b/modules/role/manifests/backup.pp index b35c542..c0672c6 100644 --- a/modules/role/manifests/backup.pp +++ b/modules/role/manifests/backup.pp @@ -14,6 +14,7 @@ class role::backup ( include "profile::xmr_stak" include "profile::known_hosts" include "profile::boinc" + include "profile::monitoring" include "role::backup::postgresql" -- cgit v1.2.3 From 0363f38e186b8c942a6db2de4974befbc373f618 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Isma=C3=ABl=20Bouya?= Date: Wed, 11 Jul 2018 07:45:22 +0200 Subject: Refactor a bit monitoring profile --- modules/profile/manifests/monitoring.pp | 4 +- modules/profile/manifests/monitoring/commands.pp | 14 +++---- modules/profile/manifests/monitoring/contacts.pp | 4 +- modules/profile/manifests/monitoring/hosts.pp | 6 +-- .../profile/manifests/monitoring/local_service.pp | 47 +++++----------------- modules/profile/manifests/monitoring/params.pp | 37 +++++++++++++++++ modules/profile/manifests/monitoring/times.pp | 4 +- .../profile/templates/monitoring/naemon.cfg.erb | 2 +- .../profile/templates/monitoring/resource.cfg.erb | 2 +- 9 files changed, 66 insertions(+), 54 deletions(-) create mode 100644 modules/profile/manifests/monitoring/params.pp (limited to 'modules') diff --git a/modules/profile/manifests/monitoring.pp b/modules/profile/manifests/monitoring.pp index 7df4ce9..8633626 100644 --- a/modules/profile/manifests/monitoring.pp +++ b/modules/profile/manifests/monitoring.pp @@ -1,7 +1,7 @@ class profile::monitoring ( Optional[String] $naemon_url = undef, Optional[String] $naemon_token = undef, -) { +) inherits profile::monitoring::params { ensure_packages(["naemon", "cnagios"]) file { "/etc/naemon": @@ -28,7 +28,7 @@ class profile::monitoring ( content => template("profile/monitoring/naemon.cfg.erb"), } -> - file { "/etc/naemon/objects.cfg": + file { $objects: ensure => "file", owner => "naemon", group => "naemon", diff --git a/modules/profile/manifests/monitoring/commands.pp b/modules/profile/manifests/monitoring/commands.pp index 4ccc816..1c8d0b4 100644 --- a/modules/profile/manifests/monitoring/commands.pp +++ b/modules/profile/manifests/monitoring/commands.pp @@ -1,7 +1,7 @@ class profile::monitoring::commands inherits profile::monitoring { ensure_packages(["monitoring-plugins"]) - file { "/etc/naemon/monitoring-plugins": + file { $plugins: ensure => "directory", owner => "root", group => "naemon", @@ -14,7 +14,7 @@ class profile::monitoring::commands inherits profile::monitoring { "check_postgres_replication", "check_last_file_date", ].each |$file| { - file { "/etc/naemon/monitoring-plugins/$file": + file { "$plugins/$file": ensure => "present", owner => "root", group => "naemon", @@ -27,7 +27,7 @@ class profile::monitoring::commands inherits profile::monitoring { ensure => "present", owner => "naemon", group => "naemon", - target => "/etc/naemon/objects.cfg", + target => $objects, notify => Service["naemon"], before => Service["naemon"], require => File["/etc/naemon"], @@ -48,16 +48,16 @@ class profile::monitoring::commands inherits profile::monitoring { command_line => '$USER1$/check_ntp_time -H 0.arch.pool.ntp.org'; "check_md_raid": command_line => '$USER2$/check_md_raid', - require => File["/etc/naemon/monitoring-plugins/check_md_raid"]; + require => File["$plugins/check_md_raid"]; "check_command_output": command_line => '$USER2$/check_command -c "$ARG1$" -o "$ARG2$" $ARG3$', - require => File["/etc/naemon/monitoring-plugins/check_command"]; + require => File["$plugins/check_command"]; "check_postgresql_replication": command_line => '/usr/bin/sudo -u postgres $USER2$/check_postgres_replication "$ARG1$" "$ARG2$" "$ARG3$"', - require => File["/etc/naemon/monitoring-plugins/check_postgres_replication"]; + require => File["$plugins/check_postgres_replication"]; "check_last_file_date": command_line => '$USER2$/check_last_file_date "$ARG1$" "$ARG2$" "$ARG3$"', - require => File["/etc/naemon/monitoring-plugins/check_last_file_date"], + require => File["$plugins/check_last_file_date"], } unless empty($naemon_url) { diff --git a/modules/profile/manifests/monitoring/contacts.pp b/modules/profile/manifests/monitoring/contacts.pp index 107af18..a751153 100644 --- a/modules/profile/manifests/monitoring/contacts.pp +++ b/modules/profile/manifests/monitoring/contacts.pp @@ -1,9 +1,9 @@ -class profile::monitoring::contacts { +class profile::monitoring::contacts inherits profile::monitoring::params { Nagios_contactgroup { ensure => "present", owner => "naemon", group => "naemon", - target => "/etc/naemon/objects.cfg", + target => $objects, notify => Service["naemon"], before => Service["naemon"], require => File["/etc/naemon"], diff --git a/modules/profile/manifests/monitoring/hosts.pp b/modules/profile/manifests/monitoring/hosts.pp index 3c8bc73..f7802be 100644 --- a/modules/profile/manifests/monitoring/hosts.pp +++ b/modules/profile/manifests/monitoring/hosts.pp @@ -1,11 +1,11 @@ -class profile::monitoring::hosts { +class profile::monitoring::hosts inherits profile::monitoring::params { $real_hostname = lookup("base_installation::real_hostname") Nagios_hostgroup { ensure => "present", owner => "naemon", group => "naemon", - target => "/etc/naemon/objects.cfg", + target => $objects, notify => Service["naemon"], before => Service["naemon"], require => File["/etc/naemon"], @@ -15,7 +15,7 @@ class profile::monitoring::hosts { ensure => "present", owner => "naemon", group => "naemon", - target => "/etc/naemon/objects.cfg", + target => $objects, notify => Service["naemon"], before => Service["naemon"], require => File["/etc/naemon"], diff --git a/modules/profile/manifests/monitoring/local_service.pp b/modules/profile/manifests/monitoring/local_service.pp index 9af7c7a..9544fdf 100644 --- a/modules/profile/manifests/monitoring/local_service.pp +++ b/modules/profile/manifests/monitoring/local_service.pp @@ -4,8 +4,9 @@ define profile::monitoring::local_service ( Optional[Hash] $master = {}, Optional[Hash] $local = {}, ) { + include profile::monitoring::params + $service_description = $title - $real_hostname = lookup("base_installation::real_hostname") Nagios_service { ensure => "present", @@ -16,35 +17,6 @@ define profile::monitoring::local_service ( require => File["/etc/naemon"], } - $service_generic = { - active_checks_enabled => "1", - check_freshness => "0", - check_interval => "10", - check_period => "24x7", - contact_groups => "admins", - event_handler_enabled => "1", - flap_detection_enabled => "1", - is_volatile => "0", - max_check_attempts => "3", - notification_interval => "60", - notification_options => "w,u,c,r", - notification_period => "24x7", - notifications_enabled => "0", - obsess_over_service => "1", - passive_checks_enabled => "1", - process_perf_data => "1", - retain_nonstatus_information => "1", - retain_status_information => "1", - retry_interval => "2", - } - - $service_local = merge($service_generic, { - host_name => $real_hostname, - check_interval => "5", - max_check_attempts => "4", - retry_interval => "1", - }) - $sudos.each |$sudo_name, $content| { ensure_resource("sudo::conf", $sudo_name, { content => $content, @@ -56,17 +28,20 @@ define profile::monitoring::local_service ( if $services_for_master { $default_local = { mode => "0644", - target => "/etc/naemon/services_for_master.cfg", - check_interval => $service_local["check_interval"], - retry_interval => $service_local["retry_interval"], - host_name => $service_local["host_name"], - use => "external-service", + target => $::profile::monitoring::params::services_for_master, + check_interval => $::profile::monitoring::params::service_local["check_interval"], + retry_interval => $::profile::monitoring::params::service_local["retry_interval"], + host_name => $::profile::monitoring::params::service_local["host_name"], + use => "external-passive-service", notify => [], } $t = "master - " $services_key = $master } else { - $default_local = merge($service_local, { target => "/etc/naemon/objects.cfg" }) + $default_local = merge( + $::profile::monitoring::params::service_local, + { target => $::profile::monitoring::params::objects } + ) $t = "" $services_key = $local } diff --git a/modules/profile/manifests/monitoring/params.pp b/modules/profile/manifests/monitoring/params.pp new file mode 100644 index 0000000..a647130 --- /dev/null +++ b/modules/profile/manifests/monitoring/params.pp @@ -0,0 +1,37 @@ +class profile::monitoring::params { + $real_hostname = lookup("base_installation::real_hostname") + + $services_for_master = "/etc/naemon/services_for_master.cfg" + $objects = "/etc/naemon/objects.cfg" + $plugins = "/etc/naemon/monitoring-plugins" + + $service_generic = { + active_checks_enabled => "1", + check_freshness => "0", + check_interval => "10", + check_period => "24x7", + contact_groups => "admins", + event_handler_enabled => "1", + flap_detection_enabled => "1", + is_volatile => "0", + max_check_attempts => "3", + notification_interval => "60", + notification_options => "w,u,c,r", + notification_period => "24x7", + notifications_enabled => "0", + obsess_over_service => "1", + passive_checks_enabled => "1", + process_perf_data => "1", + retain_nonstatus_information => "1", + retain_status_information => "1", + retry_interval => "2", + } + + $service_local = merge($service_generic, { + host_name => $real_hostname, + check_interval => "5", + max_check_attempts => "4", + retry_interval => "1", + }) + +} diff --git a/modules/profile/manifests/monitoring/times.pp b/modules/profile/manifests/monitoring/times.pp index 25bf86b..42f5d9c 100644 --- a/modules/profile/manifests/monitoring/times.pp +++ b/modules/profile/manifests/monitoring/times.pp @@ -1,9 +1,9 @@ -class profile::monitoring::times { +class profile::monitoring::times inherits profile::monitoring::params { Nagios_timeperiod { ensure => "present", owner => "naemon", group => "naemon", - target => "/etc/naemon/objects.cfg", + target => $objects, notify => Service["naemon"], before => Service["naemon"], require => File["/etc/naemon"], diff --git a/modules/profile/templates/monitoring/naemon.cfg.erb b/modules/profile/templates/monitoring/naemon.cfg.erb index 78bada3..bacbe04 100644 --- a/modules/profile/templates/monitoring/naemon.cfg.erb +++ b/modules/profile/templates/monitoring/naemon.cfg.erb @@ -26,7 +26,7 @@ log_file=/var/log/naemon/naemon.log # if you wish (as shown below), or keep them all in a single config file. # You can specify individual object config files as shown below: -cfg_file=/etc/naemon/objects.cfg +cfg_file=<%= @objects %> #cfg_file=/etc/naemon/objects/commands.cfg #cfg_file=/etc/naemon/objects/contacts.cfg #cfg_file=/etc/naemon/objects/timeperiods.cfg diff --git a/modules/profile/templates/monitoring/resource.cfg.erb b/modules/profile/templates/monitoring/resource.cfg.erb index 7da5e66..5a5c3ee 100644 --- a/modules/profile/templates/monitoring/resource.cfg.erb +++ b/modules/profile/templates/monitoring/resource.cfg.erb @@ -20,7 +20,7 @@ # Sets $USER1$ to be the path to the plugins $USER1$=/usr/lib/monitoring-plugins -$USER2$=/etc/naemon/monitoring-plugins +$USER2$=<%= @plugins %> # Sets $USER2$ to be the path to event handlers #$USER2$=/usr/lib/monitoring-plugins/eventhandlers -- cgit v1.2.3 From 0cdf494080b285e81b531a38dbbac7f7d8e9c990 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Isma=C3=ABl=20Bouya?= Date: Wed, 11 Jul 2018 08:43:05 +0200 Subject: Monitor websites from master --- modules/profile/manifests/monitoring/external_service.pp | 16 ++++++++++++++++ modules/profile/manifests/monitoring/local_service.pp | 2 +- modules/profile/manifests/monitoring/params.pp | 5 +++++ modules/profile/manifests/monitoring/services.pp | 1 + modules/role/manifests/etherpad.pp | 13 +++++++++++++ 5 files changed, 36 insertions(+), 1 deletion(-) create mode 100644 modules/profile/manifests/monitoring/external_service.pp (limited to 'modules') diff --git a/modules/profile/manifests/monitoring/external_service.pp b/modules/profile/manifests/monitoring/external_service.pp new file mode 100644 index 0000000..027dad8 --- /dev/null +++ b/modules/profile/manifests/monitoring/external_service.pp @@ -0,0 +1,16 @@ +define profile::monitoring::external_service ( + Optional[String] $type = undef, + Optional[Hash] $master = {}, +) { + include profile::monitoring::params + $service_description = $title + + nagios_service { $service_description: + service_description => $service_description, + host_name => $::profile::monitoring::params::service_local["host_name"], + use => $::profile::monitoring::params::service_types[$type], + target => $::profile::monitoring::params::services_for_master, + * => $master, + } + +} diff --git a/modules/profile/manifests/monitoring/local_service.pp b/modules/profile/manifests/monitoring/local_service.pp index 9544fdf..1f975f0 100644 --- a/modules/profile/manifests/monitoring/local_service.pp +++ b/modules/profile/manifests/monitoring/local_service.pp @@ -32,7 +32,7 @@ define profile::monitoring::local_service ( check_interval => $::profile::monitoring::params::service_local["check_interval"], retry_interval => $::profile::monitoring::params::service_local["retry_interval"], host_name => $::profile::monitoring::params::service_local["host_name"], - use => "external-passive-service", + use => $::profile::monitoring::params::service_types["passive"], notify => [], } $t = "master - " diff --git a/modules/profile/manifests/monitoring/params.pp b/modules/profile/manifests/monitoring/params.pp index a647130..27f895e 100644 --- a/modules/profile/manifests/monitoring/params.pp +++ b/modules/profile/manifests/monitoring/params.pp @@ -5,6 +5,11 @@ class profile::monitoring::params { $objects = "/etc/naemon/objects.cfg" $plugins = "/etc/naemon/monitoring-plugins" + $service_types = { + "passive" => "external-passive-service", + "web" => "external-web-service", + } + $service_generic = { active_checks_enabled => "1", check_freshness => "0", diff --git a/modules/profile/manifests/monitoring/services.pp b/modules/profile/manifests/monitoring/services.pp index b20a352..95c6efb 100644 --- a/modules/profile/manifests/monitoring/services.pp +++ b/modules/profile/manifests/monitoring/services.pp @@ -38,4 +38,5 @@ class profile::monitoring::services { } Profile::Monitoring::Local_service <| |> + Profile::Monitoring::External_service <| |> } diff --git a/modules/role/manifests/etherpad.pp b/modules/role/manifests/etherpad.pp index 1a86389..119af56 100644 --- a/modules/role/manifests/etherpad.pp +++ b/modules/role/manifests/etherpad.pp @@ -122,4 +122,17 @@ class role::etherpad ( proxy_preserve_host => true; default: * => $::profile::apache::apache_vhost_default; } + + @profile::monitoring::external_service { "Etherpad service is running on $web_host": + type => "web", + master => { + check_command => "check_https!$web_host!/!Etherpad" + } + } + @profile::monitoring::external_service { "$web_host ssl certificate is up to date": + type => "web", + master => { + check_command => "check_https_certificate!$web_host" + } + } } -- cgit v1.2.3 From d8bc769648c1528f5d749deee060d70e326ef431 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Isma=C3=ABl=20Bouya?= <ismael.bouya@normalesup.org> Date: Wed, 11 Jul 2018 09:11:42 +0200 Subject: Monitor rsync backups --- modules/profile/manifests/postgresql/backup_dump.pp | 2 +- modules/role/manifests/backup.pp | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'modules') diff --git a/modules/profile/manifests/postgresql/backup_dump.pp b/modules/profile/manifests/postgresql/backup_dump.pp index 5f456ed..e247cf0 100644 --- a/modules/profile/manifests/postgresql/backup_dump.pp +++ b/modules/profile/manifests/postgresql/backup_dump.pp @@ -63,7 +63,7 @@ define profile::postgresql::backup_dump ( "naemon-postgresql-dumps-$pg_host" => "naemon ALL=($pg_user) NOPASSWD: /usr/bin/find $pg_backup_path -mindepth 1 -maxdepth 1 -printf %T@?n", }, local => { - check_command => "check_last_file_date!$pg_backup_path!10!$pg_user", + check_command => "check_last_file_date!$pg_backup_path!7!$pg_user", } } } diff --git a/modules/role/manifests/backup.pp b/modules/role/manifests/backup.pp index c0672c6..6b8d00c 100644 --- a/modules/role/manifests/backup.pp +++ b/modules/role/manifests/backup.pp @@ -125,5 +125,11 @@ class role::backup ( order => "$order_dirname-$order_part", } } + + @profile::monitoring::local_service { "Last backup in $base is not too old": + local => { + check_command => "check_last_file_date!$base!14", + } + } } } -- cgit v1.2.3