aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIsmaël Bouya <ismael.bouya@normalesup.org>2018-07-11 09:31:24 +0200
committerIsmaël Bouya <ismael.bouya@normalesup.org>2018-07-11 09:31:24 +0200
commit2bdbb0213a0f70705f81ac2eaf8349668b2c29b4 (patch)
tree06b1e40db99815133dcc40e3e68531d45e1bd228
parent25c99a635507abfe6af4a1f0a9fc5a103d1880c0 (diff)
parentd8bc769648c1528f5d749deee060d70e326ef431 (diff)
downloadPuppet-2bdbb0213a0f70705f81ac2eaf8349668b2c29b4.tar.gz
Puppet-2bdbb0213a0f70705f81ac2eaf8349668b2c29b4.tar.zst
Puppet-2bdbb0213a0f70705f81ac2eaf8349668b2c29b4.zip
Merge branch 'monitoring' into dev
-rw-r--r--modules/base_installation/lib/puppet/provider/package/pacman.rb283
-rw-r--r--modules/base_installation/lib/puppet/provider/package/pip2.rb17
-rw-r--r--modules/base_installation/manifests/package_managers.pp15
-rw-r--r--modules/profile/files/monitoring/check_command113
-rw-r--r--modules/profile/files/monitoring/check_last_file_date31
-rw-r--r--modules/profile/files/monitoring/check_md_raid32
-rw-r--r--modules/profile/files/monitoring/check_postgres_replication35
-rw-r--r--modules/profile/manifests/fstab.pp6
-rw-r--r--modules/profile/manifests/monitoring.pp58
-rw-r--r--modules/profile/manifests/monitoring/commands.pp70
-rw-r--r--modules/profile/manifests/monitoring/contacts.pp16
-rw-r--r--modules/profile/manifests/monitoring/external_service.pp16
-rw-r--r--modules/profile/manifests/monitoring/hosts.pp45
-rw-r--r--modules/profile/manifests/monitoring/local_service.pp56
-rw-r--r--modules/profile/manifests/monitoring/params.pp42
-rw-r--r--modules/profile/manifests/monitoring/services.pp42
-rw-r--r--modules/profile/manifests/monitoring/times.pp23
-rw-r--r--modules/profile/manifests/postgresql.pp8
-rw-r--r--modules/profile/manifests/postgresql/backup_dump.pp9
-rw-r--r--modules/profile/manifests/postgresql/backup_pgbouncer.pp10
-rw-r--r--modules/profile/manifests/postgresql/master.pp10
-rw-r--r--modules/profile/manifests/postgresql/ssl.pp1
-rw-r--r--modules/profile/templates/monitoring/naemon.cfg.erb1038
-rw-r--r--modules/profile/templates/monitoring/resource.cfg.erb30
-rwxr-xr-xmodules/profile/templates/monitoring/send_nrdp.sh.erb271
-rw-r--r--modules/role/manifests/backup.pp7
-rw-r--r--modules/role/manifests/caldance.pp1
-rw-r--r--modules/role/manifests/cryptoportfolio.pp1
-rw-r--r--modules/role/manifests/etherpad.pp14
-rw-r--r--readme.md31
-rwxr-xr-xscripts/ovh_cloud_instance/arch_host_script.sh4
-rwxr-xr-xscripts/ovh_vps_ssd/arch_chroot_script.sh4
32 files changed, 2332 insertions, 7 deletions
diff --git a/modules/base_installation/lib/puppet/provider/package/pacman.rb b/modules/base_installation/lib/puppet/provider/package/pacman.rb
new file mode 100644
index 0000000..0a5e5d0
--- /dev/null
+++ b/modules/base_installation/lib/puppet/provider/package/pacman.rb
@@ -0,0 +1,283 @@
1require 'puppet/provider/package'
2require 'set'
3require 'uri'
4
5Puppet::Type.type(:package).provide :pacman, :parent => Puppet::Provider::Package do
6 desc "Support for the Package Manager Utility (pacman) used in Archlinux.
7
8 This provider supports the `install_options` attribute, which allows command-line flags to be passed to pacman.
9 These options should be specified as a string (e.g. '--flag'), a hash (e.g. {'--flag' => 'value'}),
10 or an array where each element is either a string or a hash."
11
12 # If aura is installed, we can make use of it
13 def self.aura?
14 @aura ||= Puppet::FileSystem.exist?('/usr/bin/aura')
15 end
16
17 commands :pacman => "/usr/bin/pacman"
18 # Aura is a common AUR helper which, if installed, we can use to query the AUR
19 commands :aura => "/usr/bin/aura" if aura?
20
21 confine :operatingsystem => [:archlinux, :manjarolinux]
22 defaultfor :operatingsystem => [:archlinux, :manjarolinux]
23 has_feature :install_options
24 has_feature :uninstall_options
25 has_feature :upgradeable
26 has_feature :virtual_packages
27
28 # Checks if a given name is a group
29 def self.group?(name)
30 begin
31 !pacman("-Sg", name).empty?
32 rescue Puppet::ExecutionFailure
33 # pacman returns an expected non-zero exit code when the name is not a group
34 false
35 end
36 end
37
38 # Install a package using 'pacman', or 'aura' if available.
39 # Installs quietly, without confirmation or progress bar, updates package
40 # list from servers defined in pacman.conf.
41 def install
42 if @resource[:source]
43 install_from_file
44 else
45 install_from_repo
46 end
47
48 unless self.query
49 fail(_("Could not find package '%{name}'") % { name: @resource[:name] })
50 end
51 end
52
53 # Fetch the list of packages and package groups that are currently installed on the system.
54 # Only package groups that are fully installed are included. If a group adds packages over time, it will not
55 # be considered as fully installed any more, and we would install the new packages on the next run.
56 # If a group removes packages over time, nothing will happen. This is intended.
57 def self.instances
58 instances = []
59
60 # Get the installed packages
61 installed_packages = get_installed_packages
62 installed_packages.sort_by { |k, _| k }.each do |package, version|
63 instances << new(to_resource_hash(package, version))
64 end
65
66 # Get the installed groups
67 get_installed_groups(installed_packages).each do |group, version|
68 instances << new(to_resource_hash(group, version))
69 end
70
71 instances
72 end
73
74 # returns a hash package => version of installed packages
75 def self.get_installed_packages
76 begin
77 packages = {}
78 execpipe([command(:pacman), "-Q"]) do |pipe|
79 # pacman -Q output is 'packagename version-rel'
80 regex = %r{^(\S+)\s(\S+)}
81 pipe.each_line do |line|
82 if match = regex.match(line)
83 packages[match.captures[0]] = match.captures[1]
84 else
85 warning(_("Failed to match line '%{line}'") % { line: line })
86 end
87 end
88 end
89 packages
90 rescue Puppet::ExecutionFailure
91 fail(_("Error getting installed packages"))
92 end
93 end
94
95 # returns a hash of group => version of installed groups
96 def self.get_installed_groups(installed_packages, filter = nil)
97 groups = {}
98 begin
99 # Build a hash of group name => list of packages
100 command = [command(:pacman), "-Sgg"]
101 command << filter if filter
102 execpipe(command) do |pipe|
103 pipe.each_line do |line|
104 name, package = line.split
105 packages = (groups[name] ||= [])
106 packages << package
107 end
108 end
109
110 # Remove any group that doesn't have all its packages installed
111 groups.delete_if do |_, packages|
112 !packages.all? { |package| installed_packages[package] }
113 end
114
115 # Replace the list of packages with a version string consisting of packages that make up the group
116 groups.each do |name, packages|
117 groups[name] = packages.sort.map {|package| "#{package} #{installed_packages[package]}"}.join ', '
118 end
119 rescue Puppet::ExecutionFailure
120 # pacman returns an expected non-zero exit code when the filter name is not a group
121 raise unless filter
122 end
123 groups
124 end
125
126 # Because Archlinux is a rolling release based distro, installing a package
127 # should always result in the newest release.
128 def update
129 # Install in pacman can be used for update, too
130 self.install
131 end
132
133 # We rescue the main check from Pacman with a check on the AUR using aura, if installed
134 def latest
135 # Synchronize the database
136 pacman "-Sy"
137
138 resource_name = @resource[:name]
139
140 # If target is a group, construct the group version
141 return pacman("-Sp", "--print-format", "%n %v", resource_name).lines.map{ |line| line.chomp }.sort.join(', ') if self.class.group?(resource_name)
142
143 # Start by querying with pacman first
144 # If that fails, retry using aura against the AUR
145 pacman_check = true
146 begin
147 if pacman_check
148 output = pacman "-Sp", "--print-format", "%v", resource_name
149 return output.chomp
150 else
151 output = aura "-Ai", resource_name
152 output.split("\n").each do |line|
153 return line.split[2].chomp if line.split[0] =~ /Version/
154 end
155 end
156 rescue Puppet::ExecutionFailure
157 if pacman_check and self.class.aura?
158 pacman_check = false # now try the AUR
159 retry
160 else
161 raise
162 end
163 end
164 end
165
166 # Queries information for a package or package group
167 def query
168 installed_packages = self.class.get_installed_packages
169 resource_name = @resource[:name]
170
171 # Check for the resource being a group
172 version = self.class.get_installed_groups(installed_packages, resource_name)[resource_name]
173
174 if version
175 unless @resource.allow_virtual?
176 warning(_("%{resource_name} is a group, but allow_virtual is false.") % { resource_name: resource_name })
177 return nil
178 end
179 else
180 version = installed_packages[resource_name]
181 end
182
183 # Return nil if no package or group found
184 return nil unless version
185
186 self.class.to_resource_hash(resource_name, version)
187 end
188
189 def self.to_resource_hash(name, version)
190 {
191 :name => name,
192 :ensure => version,
193 :provider => self.name
194 }
195 end
196
197 # Removes a package from the system.
198 def uninstall
199 resource_name = @resource[:name]
200
201 is_group = self.class.group?(resource_name)
202
203 fail(_("Refusing to uninstall package group %{resource_name}, because allow_virtual is false.") % { resource_name: resource_name }) if is_group && !@resource.allow_virtual?
204
205 cmd = %w{--noconfirm --noprogressbar}
206 cmd += uninstall_options if @resource[:uninstall_options]
207 cmd << "-R"
208 cmd << '-s' if is_group
209 cmd << resource_name
210
211 if self.class.aura?
212 aura(*cmd)
213 else
214 pacman(*cmd)
215 end
216 end
217
218 private
219
220 def install_with_aura?
221 resource_name = @resource[:name]
222 if !self.class.aura?
223 return false
224 end
225
226 begin
227 pacman "-Sp", resource_name
228 return false
229 rescue Puppet::ExecutionFailure
230 return true
231 end
232 end
233
234 def install_options
235 join_options(@resource[:install_options])
236 end
237
238 def uninstall_options
239 join_options(@resource[:uninstall_options])
240 end
241
242 def install_from_file
243 source = @resource[:source]
244 begin
245 source_uri = URI.parse source
246 rescue => detail
247 self.fail Puppet::Error, _("Invalid source '%{source}': %{detail}") % { source: source, detail: detail }, detail
248 end
249
250 source = case source_uri.scheme
251 when nil then source
252 when /https?/i then source
253 when /ftp/i then source
254 when /file/i then source_uri.path
255 when /puppet/i
256 fail _("puppet:// URL is not supported by pacman")
257 else
258 fail _("Source %{source} is not supported by pacman") % { source: source }
259 end
260 pacman "--noconfirm", "--noprogressbar", "-Sy"
261 pacman "--noconfirm", "--noprogressbar", "-U", source
262 end
263
264 def install_from_repo
265 resource_name = @resource[:name]
266
267 # Refuse to install if not allowing virtual packages and the resource is a group
268 fail(_("Refusing to install package group %{resource_name}, because allow_virtual is false.") % { resource_name: resource_name }) if self.class.group?(resource_name) && !@resource.allow_virtual?
269
270 cmd = %w{--noconfirm --needed}
271 cmd += install_options if @resource[:install_options]
272
273 if install_with_aura?
274 cmd << "-Aq" << resource_name
275 aura(*cmd)
276 else
277 cmd << "--noprogressbar"
278 cmd << "-Sy" << resource_name
279 pacman(*cmd)
280 end
281 end
282
283end
diff --git a/modules/base_installation/lib/puppet/provider/package/pip2.rb b/modules/base_installation/lib/puppet/provider/package/pip2.rb
new file mode 100644
index 0000000..27cc0c4
--- /dev/null
+++ b/modules/base_installation/lib/puppet/provider/package/pip2.rb
@@ -0,0 +1,17 @@
1require 'puppet/provider/package/pip'
2
3Puppet::Type.type(:package).provide :pip2,
4 :parent => :pip do
5
6 desc "Python packages via `pip2`.
7
8 This provider supports the `install_options` attribute, which allows command-line flags to be passed to pip2.
9 These options should be specified as a string (e.g. '--flag'), a hash (e.g. {'--flag' => 'value'}),
10 or an array where each element is either a string or a hash."
11
12 has_feature :installable, :uninstallable, :upgradeable, :versionable, :install_options
13
14 def self.cmd
15 ["pip2"]
16 end
17end
diff --git a/modules/base_installation/manifests/package_managers.pp b/modules/base_installation/manifests/package_managers.pp
index c5c8485..f4df186 100644
--- a/modules/base_installation/manifests/package_managers.pp
+++ b/modules/base_installation/manifests/package_managers.pp
@@ -15,9 +15,22 @@ class base_installation::package_managers inherits base_installation {
15 15
16 pacman::repo { 'multilib': 16 pacman::repo { 'multilib':
17 order => 15, 17 order => 15,
18 include => '/etc/pacman.d/mirrorlist' 18 include => '/etc/pacman.d/mirrorlist',
19 } 19 }
20 20
21 pacman::repo { 'immae':
22 order => 0,
23 server => 'https://git.immae.eu/releases/packages/',
24 siglevel => 'Optional',
25 }
26
27 exec { "refresh pacman":
28 command => "/usr/bin/pacman -Sy",
29 refreshonly => true,
30 }
31
32 Concat["/etc/pacman.conf"] ~> Exec["refresh pacman"] -> Package <| name != "pacman" |>
33
21 class { 'aur': } 34 class { 'aur': }
22 35
23 contain "pacman" 36 contain "pacman"
diff --git a/modules/profile/files/monitoring/check_command b/modules/profile/files/monitoring/check_command
new file mode 100644
index 0000000..2c7eded
--- /dev/null
+++ b/modules/profile/files/monitoring/check_command
@@ -0,0 +1,113 @@
1#!/usr/bin/perl
2
3use strict;
4use Getopt::Std;
5$| = 1;
6
7my %opts;
8getopts('hr:C:c:s:o:', \%opts);
9
10my $STATE_OK = 0;
11my $STATE_WARNING = 1;
12my $STATE_CRITICAL = 2;
13my $STATE_UNKNOWN = 3;
14
15if ($opts{'h'} || scalar(%opts) == 0) {
16 &print_help();
17 exit($STATE_OK);
18}
19
20my $command = $opts{'c'};
21if ($command eq '') {
22 print "You must provide a command to check.\n";
23 exit($STATE_UNKNOWN);
24}
25
26my $expected_output = $opts{'o'};
27my $expected_status = $opts{'s'};
28my $other_command = $opts{'C'};
29
30if ($other_command eq '' and $expected_status eq '' and $expected_output eq '') {
31 $expected_status = 0;
32}
33
34my $cmd = $command . ' 2>&1';
35my $other_cmd;
36if ($other_command ne '') {
37 $other_cmd = $other_command . ' 2>&1';
38}
39
40my $run_as;
41if ($opts{'r'}) {
42 $run_as = $opts{'r'};
43 $cmd = "sudo -u $run_as -n $cmd";
44
45 if ($other_command ne '') {
46 $other_cmd = "sudo -u $run_as -n $other_cmd";
47 }
48
49}
50
51my $cmd_result = `$cmd`;
52my $other_cmd_result;
53if ($other_command ne '') {
54 $other_cmd_result = `$other_cmd`;
55 chomp($other_cmd_result);
56}
57
58chomp($cmd_result);
59if ($cmd_result =~ /sudo/i) {
60 print "$command CRITICAL - No sudo right to run the command\n";
61 exit($STATE_UNKNOWN);
62} elsif ($expected_status ne '') {
63 if ($? != $expected_status) {
64 print "$command CRITICAL - Response status $?\n";
65 exit($STATE_CRITICAL);
66 } else {
67 print "$command OK - Response status $?\n";
68 exit($STATE_OK);
69 }
70} elsif ($other_command ne '') {
71 if ($cmd_result ne $other_cmd_result) {
72 print "$command CRITICAL - Expected output not matching other command output\n";
73 exit($STATE_CRITICAL);
74 } else {
75 print "$command OK - Expected output matching other command output\n";
76 exit($STATE_OK);
77 }
78} else {
79 if ($cmd_result !~ /$expected_output/) {
80 print "$command CRITICAL - Expected output not matching\n";
81 exit($STATE_CRITICAL);
82 } else {
83 print "$command OK - Expected output matching\n";
84 exit($STATE_OK);
85 }
86}
87
88sub print_help() {
89 print << "EOF";
90Check whether the given command responds as expected. One of -o -C or -s must be selected.
91
92Options:
93-h
94 Print detailed help screen
95
96-c
97 command to run (required)
98
99-C
100 other command to compare output
101
102-r user
103 Run as user via sudo.
104
105-s
106 status code to check
107
108-o
109 output to check
110
111EOF
112}
113
diff --git a/modules/profile/files/monitoring/check_last_file_date b/modules/profile/files/monitoring/check_last_file_date
new file mode 100644
index 0000000..8eabb57
--- /dev/null
+++ b/modules/profile/files/monitoring/check_last_file_date
@@ -0,0 +1,31 @@
1#!/bin/bash
2
3STATE_OK=0
4STATE_WARNING=1
5STATE_CRITICAL=2
6STATE_UNKNOWN=3
7
8base_path=$1
9hours=$2
10as_user=$3
11
12if [ -z "$as_user" ]; then
13 last_date=$(find $base_path -mindepth 1 -maxdepth 1 -printf "%T@\n" 2>/dev/null | sort | tail -n 1)
14else
15 last_date=$(sudo -u "$as_user" find $base_path -mindepth 1 -maxdepth 1 -printf "%T@\n" 2>/dev/null | sort | tail -n 1)
16fi
17
18if [ -z "$last_date" ]; then
19 echo "UNKNOWN: Could not read folder"
20 exit $STATE_UNKNOWN
21else
22 LC_ALL=C last_date=$(printf "%.*f" 0 $last_date)
23 min_date=$(date -d "$hours hours ago" "+%s")
24 if [ "$min_date" -lt "$last_date" ]; then
25 echo "OK: Last backup $(date -d @$last_date)"
26 exit $STATE_OK
27 else
28 echo "CRITICAL: Last backup $(date -d @$last_date)"
29 exit $STATE_CRITICAL
30 fi
31fi
diff --git a/modules/profile/files/monitoring/check_md_raid b/modules/profile/files/monitoring/check_md_raid
new file mode 100644
index 0000000..9c79a7a
--- /dev/null
+++ b/modules/profile/files/monitoring/check_md_raid
@@ -0,0 +1,32 @@
1#!/bin/bash
2#
3# Created by Sebastian Grewe, Jammicron Technology
4#
5
6# Get count of raid arrays
7RAID_DEVICES=`grep ^md -c /proc/mdstat`
8
9# Get count of degraded arrays
10RAID_STATUS=`grep "\[.*_.*\]" /proc/mdstat -c`
11
12# Is an array currently recovering, get percentage of recovery
13RAID_RECOVER=`grep recovery /proc/mdstat | awk '{print $4}'`
14
15# Check raid status
16# RAID recovers --> Warning
17if [[ $RAID_RECOVER ]]; then
18 STATUS="WARNING - Checked $RAID_DEVICES arrays, recovering : $RAID_RECOVER"
19 EXIT=1
20# RAID ok
21elif [[ $RAID_STATUS == "0" ]]; then
22 STATUS="OK - Checked $RAID_DEVICES arrays."
23 EXIT=0
24# All else critical, better save than sorry
25else
26 STATUS="CRITICAL - Checked $RAID_DEVICES arrays, $RAID_STATUS have FAILED"
27 EXIT=2
28fi
29
30# Status and quit
31echo $STATUS
32exit $EXIT
diff --git a/modules/profile/files/monitoring/check_postgres_replication b/modules/profile/files/monitoring/check_postgres_replication
new file mode 100644
index 0000000..a550077
--- /dev/null
+++ b/modules/profile/files/monitoring/check_postgres_replication
@@ -0,0 +1,35 @@
1#!/bin/bash
2
3STATE_OK=0
4STATE_WARNING=1
5STATE_CRITICAL=2
6STATE_UNKNOWN=3
7
8user=$1
9host=$2
10port=$3
11
12lag=$(psql -h $host -p $port -A -t -c "SELECT COALESCE(EXTRACT(EPOCH FROM replay_lag),0) FROM pg_stat_replication WHERE usename='$user'" 2>/dev/null)
13exit_code=$?
14
15if [[ $exit_code -ne 0 ]]; then
16 echo "UNKNOWN - Impossible to run psql command"
17 exit $STATE_UNKNOWN
18elif [[ -z "$lag" ]]; then
19 echo "UNKNOWN - No replication found for $user"
20 exit $STATE_UNKNOWN
21else
22 output="Replication lag for $user is ${lag}s"
23 LC_ALL=C lag=$(printf "%.*f" 0 $lag)
24
25 if [[ $lag -lt 5 ]]; then
26 echo "OK - $output"
27 exit $STATE_OK
28 elif [[ $lag -lt 10 ]]; then
29 echo "WARNING - $output"
30 exit $STATE_WARNING
31 else
32 echo "CRITICAL - $output"
33 exit $STATE_CRITICAL
34 fi
35fi
diff --git a/modules/profile/manifests/fstab.pp b/modules/profile/manifests/fstab.pp
index 8ecfe72..3af316e 100644
--- a/modules/profile/manifests/fstab.pp
+++ b/modules/profile/manifests/fstab.pp
@@ -15,6 +15,12 @@ class profile::fstab (
15 device => "UUID=${infos[1]}", 15 device => "UUID=${infos[1]}",
16 fstype => $infos[2] 16 fstype => $infos[2]
17 } 17 }
18
19 @profile::monitoring::local_service { "Size on ${infos[0]} partition":
20 local => {
21 check_command => "check_local_disk!10%!5%!${infos[0]}",
22 };
23 }
18 } 24 }
19 } 25 }
20} 26}
diff --git a/modules/profile/manifests/monitoring.pp b/modules/profile/manifests/monitoring.pp
new file mode 100644
index 0000000..8633626
--- /dev/null
+++ b/modules/profile/manifests/monitoring.pp
@@ -0,0 +1,58 @@
1class profile::monitoring (
2 Optional[String] $naemon_url = undef,
3 Optional[String] $naemon_token = undef,
4) inherits profile::monitoring::params {
5 ensure_packages(["naemon", "cnagios"])
6
7 file { "/etc/naemon":
8 ensure => "directory",
9 recurse => true,
10 purge => true,
11 force => true,
12 require => Package["naemon"],
13 }
14 ->
15 file { "/etc/naemon/resource.cfg":
16 ensure => "file",
17 owner => "naemon",
18 group => "naemon",
19 mode => "0600",
20 content => template("profile/monitoring/resource.cfg.erb"),
21 }
22 ->
23 file { "/etc/naemon/naemon.cfg":
24 ensure => "file",
25 owner => "naemon",
26 group => "naemon",
27 mode => "0644",
28 content => template("profile/monitoring/naemon.cfg.erb"),
29 }
30 ->
31 file { $objects:
32 ensure => "file",
33 owner => "naemon",
34 group => "naemon",
35 mode => "0600"
36 }
37 ->
38 service { "naemon":
39 ensure => "running",
40 enable => true,
41 }
42
43 unless ($naemon_url == undef or empty($naemon_url)) {
44 file { "/etc/naemon/send_nrdp.sh":
45 ensure => "file",
46 owner => "naemon",
47 group => "naemon",
48 mode => "0700",
49 content => template("profile/monitoring/send_nrdp.sh.erb"),
50 }
51 }
52
53 include "profile::monitoring::hosts"
54 include "profile::monitoring::services"
55 include "profile::monitoring::commands"
56 include "profile::monitoring::times"
57 include "profile::monitoring::contacts"
58}
diff --git a/modules/profile/manifests/monitoring/commands.pp b/modules/profile/manifests/monitoring/commands.pp
new file mode 100644
index 0000000..1c8d0b4
--- /dev/null
+++ b/modules/profile/manifests/monitoring/commands.pp
@@ -0,0 +1,70 @@
1class profile::monitoring::commands inherits profile::monitoring {
2 ensure_packages(["monitoring-plugins"])
3
4 file { $plugins:
5 ensure => "directory",
6 owner => "root",
7 group => "naemon",
8 mode => "0755",
9 }
10
11 [
12 "check_command",
13 "check_md_raid",
14 "check_postgres_replication",
15 "check_last_file_date",
16 ].each |$file| {
17 file { "$plugins/$file":
18 ensure => "present",
19 owner => "root",
20 group => "naemon",
21 mode => "0755",
22 source => "puppet:///modules/profile/monitoring/$file",
23 }
24 }
25
26 Nagios_command {
27 ensure => "present",
28 owner => "naemon",
29 group => "naemon",
30 target => $objects,
31 notify => Service["naemon"],
32 before => Service["naemon"],
33 require => File["/etc/naemon"],
34 }
35
36 nagios_command {
37 "check-host-alive":
38 command_line => '$USER1$/check_ping -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 5';
39 "check_local_disk":
40 command_line => '$USER1$/check_disk -w $ARG1$ -c $ARG2$ -p $ARG3$';
41 "check_local_procs":
42 command_line => '$USER1$/check_procs -w $ARG1$ -c $ARG2$ -s $ARG3$';
43 "check_local_load":
44 command_line => '$USER1$/check_load -w $ARG1$ -c $ARG2$';
45 "check_local_swap":
46 command_line => '$USER1$/check_swap -n ok -w $ARG1$ -c $ARG2$';
47 "check_ntp":
48 command_line => '$USER1$/check_ntp_time -H 0.arch.pool.ntp.org';
49 "check_md_raid":
50 command_line => '$USER2$/check_md_raid',
51 require => File["$plugins/check_md_raid"];
52 "check_command_output":
53 command_line => '$USER2$/check_command -c "$ARG1$" -o "$ARG2$" $ARG3$',
54 require => File["$plugins/check_command"];
55 "check_postgresql_replication":
56 command_line => '/usr/bin/sudo -u postgres $USER2$/check_postgres_replication "$ARG1$" "$ARG2$" "$ARG3$"',
57 require => File["$plugins/check_postgres_replication"];
58 "check_last_file_date":
59 command_line => '$USER2$/check_last_file_date "$ARG1$" "$ARG2$" "$ARG3$"',
60 require => File["$plugins/check_last_file_date"],
61 }
62
63 unless empty($naemon_url) {
64 nagios_command {
65 "notify-master":
66 command_line => '/etc/naemon/send_nrdp.sh -H "$HOSTADDRESS$" -s "$SERVICEDESC$" -S "$SERVICESTATEID$" -o "$SERVICEOUTPUT$"',
67 require => File["/etc/naemon/send_nrdp.sh"];
68 }
69 }
70}
diff --git a/modules/profile/manifests/monitoring/contacts.pp b/modules/profile/manifests/monitoring/contacts.pp
new file mode 100644
index 0000000..a751153
--- /dev/null
+++ b/modules/profile/manifests/monitoring/contacts.pp
@@ -0,0 +1,16 @@
1class profile::monitoring::contacts inherits profile::monitoring::params {
2 Nagios_contactgroup {
3 ensure => "present",
4 owner => "naemon",
5 group => "naemon",
6 target => $objects,
7 notify => Service["naemon"],
8 before => Service["naemon"],
9 require => File["/etc/naemon"],
10 }
11
12 nagios_contactgroup { "admins":
13 alias => "System administrators",
14 }
15
16}
diff --git a/modules/profile/manifests/monitoring/external_service.pp b/modules/profile/manifests/monitoring/external_service.pp
new file mode 100644
index 0000000..027dad8
--- /dev/null
+++ b/modules/profile/manifests/monitoring/external_service.pp
@@ -0,0 +1,16 @@
1define profile::monitoring::external_service (
2 Optional[String] $type = undef,
3 Optional[Hash] $master = {},
4) {
5 include profile::monitoring::params
6 $service_description = $title
7
8 nagios_service { $service_description:
9 service_description => $service_description,
10 host_name => $::profile::monitoring::params::service_local["host_name"],
11 use => $::profile::monitoring::params::service_types[$type],
12 target => $::profile::monitoring::params::services_for_master,
13 * => $master,
14 }
15
16}
diff --git a/modules/profile/manifests/monitoring/hosts.pp b/modules/profile/manifests/monitoring/hosts.pp
new file mode 100644
index 0000000..f7802be
--- /dev/null
+++ b/modules/profile/manifests/monitoring/hosts.pp
@@ -0,0 +1,45 @@
1class profile::monitoring::hosts inherits profile::monitoring::params {
2 $real_hostname = lookup("base_installation::real_hostname")
3
4 Nagios_hostgroup {
5 ensure => "present",
6 owner => "naemon",
7 group => "naemon",
8 target => $objects,
9 notify => Service["naemon"],
10 before => Service["naemon"],
11 require => File["/etc/naemon"],
12 }
13
14 Nagios_host {
15 ensure => "present",
16 owner => "naemon",
17 group => "naemon",
18 target => $objects,
19 notify => Service["naemon"],
20 before => Service["naemon"],
21 require => File["/etc/naemon"],
22 }
23
24 nagios_hostgroup { "linux-servers":
25 alias => "Linux Servers",
26 members => [$real_hostname],
27 }
28
29 $host_linux_server = {
30 check_command => "check-host-alive",
31 check_interval => 5,
32 check_period => "24x7",
33 contact_groups => "admins",
34 max_check_attempts => "10",
35 notification_interval => "120",
36 notification_options => "d,u,r",
37 retry_interval => "1",
38 }
39
40 nagios_host { $real_hostname:
41 address => $real_hostname;
42 default: * => $host_linux_server,
43 }
44
45}
diff --git a/modules/profile/manifests/monitoring/local_service.pp b/modules/profile/manifests/monitoring/local_service.pp
new file mode 100644
index 0000000..1f975f0
--- /dev/null
+++ b/modules/profile/manifests/monitoring/local_service.pp
@@ -0,0 +1,56 @@
1define profile::monitoring::local_service (
2 Optional[Hash] $sudos = {},
3 Optional[Hash] $common = {},
4 Optional[Hash] $master = {},
5 Optional[Hash] $local = {},
6) {
7 include profile::monitoring::params
8
9 $service_description = $title
10
11 Nagios_service {
12 ensure => "present",
13 owner => "naemon",
14 group => "naemon",
15 notify => Service["naemon"],
16 before => Service["naemon"],
17 require => File["/etc/naemon"],
18 }
19
20 $sudos.each |$sudo_name, $content| {
21 ensure_resource("sudo::conf", $sudo_name, {
22 content => $content,
23 before => Nagios_service[$service_description],
24 })
25 }
26
27 [true, false].each |$services_for_master| {
28 if $services_for_master {
29 $default_local = {
30 mode => "0644",
31 target => $::profile::monitoring::params::services_for_master,
32 check_interval => $::profile::monitoring::params::service_local["check_interval"],
33 retry_interval => $::profile::monitoring::params::service_local["retry_interval"],
34 host_name => $::profile::monitoring::params::service_local["host_name"],
35 use => $::profile::monitoring::params::service_types["passive"],
36 notify => [],
37 }
38 $t = "master - "
39 $services_key = $master
40 } else {
41 $default_local = merge(
42 $::profile::monitoring::params::service_local,
43 { target => $::profile::monitoring::params::objects }
44 )
45 $t = ""
46 $services_key = $local
47 }
48
49 $hash = merge($default_local, $common, $services_key)
50
51 nagios_service { "$t$service_description":
52 service_description => $service_description,
53 * => $hash
54 }
55 }
56}
diff --git a/modules/profile/manifests/monitoring/params.pp b/modules/profile/manifests/monitoring/params.pp
new file mode 100644
index 0000000..27f895e
--- /dev/null
+++ b/modules/profile/manifests/monitoring/params.pp
@@ -0,0 +1,42 @@
1class profile::monitoring::params {
2 $real_hostname = lookup("base_installation::real_hostname")
3
4 $services_for_master = "/etc/naemon/services_for_master.cfg"
5 $objects = "/etc/naemon/objects.cfg"
6 $plugins = "/etc/naemon/monitoring-plugins"
7
8 $service_types = {
9 "passive" => "external-passive-service",
10 "web" => "external-web-service",
11 }
12
13 $service_generic = {
14 active_checks_enabled => "1",
15 check_freshness => "0",
16 check_interval => "10",
17 check_period => "24x7",
18 contact_groups => "admins",
19 event_handler_enabled => "1",
20 flap_detection_enabled => "1",
21 is_volatile => "0",
22 max_check_attempts => "3",
23 notification_interval => "60",
24 notification_options => "w,u,c,r",
25 notification_period => "24x7",
26 notifications_enabled => "0",
27 obsess_over_service => "1",
28 passive_checks_enabled => "1",
29 process_perf_data => "1",
30 retain_nonstatus_information => "1",
31 retain_status_information => "1",
32 retry_interval => "2",
33 }
34
35 $service_local = merge($service_generic, {
36 host_name => $real_hostname,
37 check_interval => "5",
38 max_check_attempts => "4",
39 retry_interval => "1",
40 })
41
42}
diff --git a/modules/profile/manifests/monitoring/services.pp b/modules/profile/manifests/monitoring/services.pp
new file mode 100644
index 0000000..95c6efb
--- /dev/null
+++ b/modules/profile/manifests/monitoring/services.pp
@@ -0,0 +1,42 @@
1class profile::monitoring::services {
2
3 profile::monitoring::local_service {
4 "Size on root partition":
5 local => {
6 check_command => "check_local_disk!20%!10%!/",
7 };
8 "Total number of process":
9 local => {
10 check_command => "check_local_procs!50!100!RSZDT",
11 };
12 "Average load":
13 local => {
14 check_command => "check_local_load!8.0,8.0,8.0!10.0,10.0,10.0",
15 };
16 "Swap usage":
17 local => {
18 check_command => "check_local_swap!20!10",
19 };
20 "fail2ban is active":
21 sudos => {
22 "naemon-fail2ban" => "naemon ALL=(root) NOPASSWD: /usr/bin/fail2ban-client ping",
23 },
24 local => {
25 check_command => "check_command_output!fail2ban-client ping!pong!-r root",
26 };
27 "NTP is activated and working":
28 local => {
29 check_command => "check_ntp",
30 };
31 "No mdadm array is degraded":
32 common => {
33 ensure => (find_file("/proc/mdstat") == undef) ? { true => "absent", default =>"present" },
34 },
35 local => {
36 check_command => "check_md_raid",
37 };
38 }
39
40 Profile::Monitoring::Local_service <| |>
41 Profile::Monitoring::External_service <| |>
42}
diff --git a/modules/profile/manifests/monitoring/times.pp b/modules/profile/manifests/monitoring/times.pp
new file mode 100644
index 0000000..42f5d9c
--- /dev/null
+++ b/modules/profile/manifests/monitoring/times.pp
@@ -0,0 +1,23 @@
1class profile::monitoring::times inherits profile::monitoring::params {
2 Nagios_timeperiod {
3 ensure => "present",
4 owner => "naemon",
5 group => "naemon",
6 target => $objects,
7 notify => Service["naemon"],
8 before => Service["naemon"],
9 require => File["/etc/naemon"],
10 }
11
12 nagios_timeperiod { "24x7":
13 alias => "24 Hours A Day, 7 Days A Week",
14 monday => "00:00-24:00",
15 tuesday => "00:00-24:00",
16 wednesday => "00:00-24:00",
17 thursday => "00:00-24:00",
18 friday => "00:00-24:00",
19 saturday => "00:00-24:00",
20 sunday => "00:00-24:00",
21 }
22
23}
diff --git a/modules/profile/manifests/postgresql.pp b/modules/profile/manifests/postgresql.pp
index 97ce572..fedbcb1 100644
--- a/modules/profile/manifests/postgresql.pp
+++ b/modules/profile/manifests/postgresql.pp
@@ -28,5 +28,13 @@ class profile::postgresql (
28 28
29 profile::postgresql::base_pg_hba_rules { "default": } 29 profile::postgresql::base_pg_hba_rules { "default": }
30 30
31 @profile::monitoring::local_service { "Databases are present in postgresql":
32 sudos => {
33 "naemon-postgresql-database-public" => "naemon ALL=(postgres) NOPASSWD: /usr/bin/psql -c select\\ nspname\\ from\\ pg_catalog.pg_namespace"
34 },
35 local => {
36 check_command => "check_command_output!psql -c 'select nspname from pg_catalog.pg_namespace'!public!-r postgres",
37 }
38 }
31} 39}
32 40
diff --git a/modules/profile/manifests/postgresql/backup_dump.pp b/modules/profile/manifests/postgresql/backup_dump.pp
index 53fb20e..e247cf0 100644
--- a/modules/profile/manifests/postgresql/backup_dump.pp
+++ b/modules/profile/manifests/postgresql/backup_dump.pp
@@ -57,4 +57,13 @@ define profile::postgresql::backup_dump (
57 }, 57 },
58 ] 58 ]
59 } 59 }
60
61 @profile::monitoring::local_service { "Last postgresql dump in $pg_backup_path is not too old":
62 sudos => {
63 "naemon-postgresql-dumps-$pg_host" => "naemon ALL=($pg_user) NOPASSWD: /usr/bin/find $pg_backup_path -mindepth 1 -maxdepth 1 -printf %T@?n",
64 },
65 local => {
66 check_command => "check_last_file_date!$pg_backup_path!7!$pg_user",
67 }
68 }
60} 69}
diff --git a/modules/profile/manifests/postgresql/backup_pgbouncer.pp b/modules/profile/manifests/postgresql/backup_pgbouncer.pp
index 45b8ed5..5fd7861 100644
--- a/modules/profile/manifests/postgresql/backup_pgbouncer.pp
+++ b/modules/profile/manifests/postgresql/backup_pgbouncer.pp
@@ -48,6 +48,16 @@ define profile::postgresql::backup_pgbouncer (
48 content => "${pg_infos[pgbouncer_dbname]} = host=$pg_path$pg_port user=${pg_infos[dbuser]} dbname=${pg_infos[dbname]}", 48 content => "${pg_infos[pgbouncer_dbname]} = host=$pg_path$pg_port user=${pg_infos[dbuser]} dbname=${pg_infos[dbname]}",
49 } 49 }
50 50
51 # Current pam configuration requires password for postgres
52 # @profile::monitoring::local_service { "Database ${pg_infos[pgbouncer_dbname]} is available in pgbouncer":
53 # sudos => {
54 # "naemon-postgresql-database-public" => "naemon ALL=(postgres) NOPASSWD: /usr/bin/psql -c select\ nspname\ from\ pg_catalog.pg_namespace ${pg_infos[pgbouncer_dbname]}"
55 # },
56 # local => {
57 # check_command => "check_command_output!psql -c 'select nspname from pg_catalog.pg_namespace' ${pg_infos[pgbouncer_dbname]}!public!-r postgres",
58 # }
59 # }
60
51 # pg_hba for accessed cluster 61 # pg_hba for accessed cluster
52 postgresql::server::pg_hba_rule { "$pg_backup_host - local access as ${pg_infos[dbuser]} user": 62 postgresql::server::pg_hba_rule { "$pg_backup_host - local access as ${pg_infos[dbuser]} user":
53 description => "Allow local access to ${pg_infos[dbuser]} user", 63 description => "Allow local access to ${pg_infos[dbuser]} user",
diff --git a/modules/profile/manifests/postgresql/master.pp b/modules/profile/manifests/postgresql/master.pp
index 02315a6..99ac4c4 100644
--- a/modules/profile/manifests/postgresql/master.pp
+++ b/modules/profile/manifests/postgresql/master.pp
@@ -59,5 +59,15 @@ define profile::postgresql::master (
59 handle_slot => true, 59 handle_slot => true,
60 add_self_role => true, 60 add_self_role => true,
61 } 61 }
62
63 @profile::monitoring::local_service { "Postgresql replication for $backup_host is up to date":
64 sudos => {
65 "naemon-postgresql-replication-$backup_host" => "naemon ALL=(postgres) NOPASSWD: /etc/naemon/monitoring-plugins/check_postgres_replication $backup_host /run/postgresql 5432"
66
67 },
68 local => {
69 check_command => "check_postgresql_replication!$backup_host!/run/postgresql!5432",
70 }
71 }
62 } 72 }
63} 73}
diff --git a/modules/profile/manifests/postgresql/ssl.pp b/modules/profile/manifests/postgresql/ssl.pp
index dc56c0b..b809a9d 100644
--- a/modules/profile/manifests/postgresql/ssl.pp
+++ b/modules/profile/manifests/postgresql/ssl.pp
@@ -78,5 +78,4 @@ define profile::postgresql::ssl (
78 content => "ssl = on\nssl_key_file = '$ssl_key'\nssl_cert_file = '$ssl_cert'\n" 78 content => "ssl = on\nssl_key_file = '$ssl_key'\nssl_cert_file = '$ssl_cert'\n"
79 } 79 }
80 } 80 }
81
82} 81}
diff --git a/modules/profile/templates/monitoring/naemon.cfg.erb b/modules/profile/templates/monitoring/naemon.cfg.erb
new file mode 100644
index 0000000..bacbe04
--- /dev/null
+++ b/modules/profile/templates/monitoring/naemon.cfg.erb
@@ -0,0 +1,1038 @@
1##############################################################################
2#
3# naemon.cfg - Sample Main Config File for Naemon 1.0.7
4#
5# Read the documentation for more information on this configuration
6# file. I've provided some comments here, but things may not be so
7# clear without further explanation.
8#
9#
10##############################################################################
11
12
13# LOG FILE
14# This is the main log file where service and host events are logged
15# for historical purposes. This should be the first option specified
16# in the config file!!!
17
18log_file=/var/log/naemon/naemon.log
19
20
21
22# OBJECT CONFIGURATION FILE(S)
23# These are the object configuration files in which you define hosts,
24# host groups, contacts, contact groups, services, etc.
25# You can split your object definitions across several config files
26# if you wish (as shown below), or keep them all in a single config file.
27
28# You can specify individual object config files as shown below:
29cfg_file=<%= @objects %>
30#cfg_file=/etc/naemon/objects/commands.cfg
31#cfg_file=/etc/naemon/objects/contacts.cfg
32#cfg_file=/etc/naemon/objects/timeperiods.cfg
33#cfg_file=/etc/naemon/objects/templates.cfg
34
35
36# You can also tell naemon to process all config files (with a .cfg
37# extension) in a particular directory by using the cfg_dir
38# directive as shown below:
39#cfg_dir=/etc/naemon/conf.d
40
41
42
43
44# OBJECT CACHE FILE
45# This option determines where object definitions are cached when
46# naemon starts/restarts. The CGIs read object definitions from
47# this cache file (rather than looking at the object config files
48# directly) in order to prevent inconsistencies that can occur
49# when the config files are modified after naemon starts.
50
51object_cache_file=/var/lib/naemon/objects.cache
52
53
54
55# PRE-CACHED OBJECT FILE
56# This options determines the location of the precached object file.
57# If you run naemon with the -p command line option, it will preprocess
58# your object configuration file(s) and write the cached config to this
59# file. You can then start naemon with the -u option to have it read
60# object definitions from this precached file, rather than the standard
61# object configuration files (see the cfg_file and cfg_dir options above).
62# Using a precached object file can speed up the time needed to (re)start
63# the naemon process if you've got a large and/or complex configuration.
64# Read the documentation section on optimizing naemon to find our more
65# about how this feature works.
66
67precached_object_file=/var/lib/naemon/objects.precache
68
69
70
71# RESOURCE FILE
72# This is an optional resource file that contains $USERx$ macro
73# definitions. Multiple resource files can be specified by using
74# multiple resource_file definitions. The CGIs will not attempt to
75# read the contents of resource files, so information that is
76# considered to be sensitive (usernames, passwords, etc) can be
77# defined as macros in this file and restrictive permissions (600)
78# can be placed on this file.
79
80resource_file=/etc/naemon/resource.cfg
81
82
83
84# STATUS FILE
85# This is where the current status of all monitored services and
86# hosts is stored. Its contents are read and processed by the CGIs.
87# The contents of the status file are deleted every time naemon
88# restarts.
89
90status_file=/var/lib/naemon/status.dat
91
92
93
94# STATUS FILE UPDATE INTERVAL
95# This option determines the frequency (in seconds) that
96# naemon will periodically dump program, host, and
97# service status data. Set it to 0 to disable updates.
98
99status_update_interval=10
100
101
102
103# EXTERNAL COMMAND OPTION
104# This option allows you to specify whether or not Naemon should check
105# for external commands (in the command file defined below). By default
106# Naemon will *not* check for external commands, just to be on the
107# cautious side. If you want to be able to use the CGI command interface
108# you will have to enable this.
109# Values: 0 = disable commands, 1 = enable commands
110
111check_external_commands=1
112
113
114
115# EXTERNAL COMMAND FILE
116# This is the file that Naemon checks for external command requests.
117# It is also where the command CGI will write commands that are submitted
118# by users, so it must be writeable by the user that the web server
119# is running as (usually 'nobody'). Permissions should be set at the
120# directory level instead of on the file, as the file is deleted every
121# time its contents are processed.
122
123command_file=/var/lib/naemon/naemon.cmd
124
125
126
127# QUERY HANDLER INTERFACE
128# This is the socket that is created for the Query Handler interface
129
130#query_socket=/var/lib/naemon/naemon.qh
131
132
133
134# LOCK FILE
135# This is the lockfile that Naemon will use to store its PID number
136# in when it is running in daemon mode.
137
138lock_file=/run/naemon/naemon.pid
139
140
141
142# TEMP FILE
143# This is a temporary file that is used as scratch space when Naemon
144# updates the status log, cleans the comment file, etc. This file
145# is created, used, and deleted throughout the time that Naemon is
146# running.
147
148temp_file=/var/lib/naemon/naemon.tmp
149
150
151
152# TEMP PATH
153# This is path where Naemon can create temp files for service and
154# host check results, etc.
155
156temp_path=/var/cache/naemon
157
158
159
160# EVENT BROKER OPTIONS
161# Controls what (if any) data gets sent to the event broker.
162# Values: 0 = Broker nothing
163# -1 = Broker everything
164# <other> = See documentation
165
166event_broker_options=-1
167
168
169
170# EVENT BROKER MODULE(S)
171# This directive is used to specify an event broker module that should
172# be loaded by Naemon at startup. Use multiple directives if you want
173# to load more than one module. Arguments that should be passed to
174# the module at startup are seperated from the module path by a space.
175#
176# Example:
177#
178# broker_module=<modulepath> [moduleargs]
179
180#broker_module=/usr/lib/naemon/naemon-livestatus/livestatus.so /var/cache/naemon/live
181#broker_module=/somewhere/module1.o
182#broker_module=/somewhere/module2.o arg1 arg2=3 debug=0
183
184# In order to provide drop-in support for new modules, you can also make use of
185# the include_dir directive. The include_dir directive causes Naemon to parse
186# any configuration (not just object configuration, as with cfg_dir) as if the
187# contents of the files in the pointed-to directory was included on this line.
188# The path to the directory is relative to the path of the main naemon.cfg
189# file.
190# include_dir=module-conf.d
191
192# LOG ARCHIVE PATH
193# This is the directory where archived (rotated) log files are placed by the
194# logrotate daemon. It is used by out of core add-ons to discover the logfiles.
195
196log_archive_path=/var/log/naemon/archives
197
198
199
200# LOGGING OPTIONS
201# If you want messages logged to the syslog facility, as well as the
202# Naemon log file set this option to 1. If not, set it to 0.
203
204use_syslog=1
205
206# NOTIFICATION LOGGING OPTION
207# If you don't want notifications to be logged, set this value to 0.
208# If notifications should be logged, set the value to 1.
209
210log_notifications=1
211
212# Notification suppression reason (NSR) logging causes the reason for a
213# notification suppression to be logged, when it occurs. This can potentially
214# add some noise to your log file, but is highly useful when troubleshooting
215# missing notifications.
216
217enable_notification_suppression_reason_logging=1
218
219
220# SERVICE RETRY LOGGING OPTION
221# If you don't want service check retries to be logged, set this value
222# to 0. If retries should be logged, set the value to 1.
223
224log_service_retries=1
225
226
227
228# HOST RETRY LOGGING OPTION
229# If you don't want host check retries to be logged, set this value to
230# 0. If retries should be logged, set the value to 1.
231
232log_host_retries=1
233
234
235
236# EVENT HANDLER LOGGING OPTION
237# If you don't want host and service event handlers to be logged, set
238# this value to 0. If event handlers should be logged, set the value
239# to 1.
240
241log_event_handlers=1
242
243
244
245# INITIAL STATES LOGGING OPTION
246# If you want Naemon to log all initial host and service states to
247# the main log file (the first time the service or host is checked)
248# you can enable this option by setting this value to 1. If you
249# are not using an external application that does long term state
250# statistics reporting, you do not need to enable this option. In
251# this case, set the value to 0.
252
253log_initial_states=1
254
255
256
257# CURRENT STATES LOGGING OPTION
258# If you don't want Naemon to log all current host and service states
259# after log has been rotated to the main log file, you can disable this
260# option by setting this value to 0. Default value is 1.
261
262log_current_states=1
263
264
265
266# EXTERNAL COMMANDS LOGGING OPTION
267# If you don't want Naemon to log external commands, set this value
268# to 0. If external commands should be logged, set this value to 1.
269# Note: This option does not include logging of passive service
270# checks - see the option below for controlling whether or not
271# passive checks are logged.
272
273log_external_commands=1
274
275
276
277# PASSIVE CHECKS LOGGING OPTION
278# If you don't want Naemon to log passive host and service checks, set
279# this value to 0. If passive checks should be logged, set
280# this value to 1.
281
282log_passive_checks=1
283
284
285
286# GLOBAL HOST AND SERVICE EVENT HANDLERS
287# These options allow you to specify a host and service event handler
288# command that is to be run for every host or service state change.
289# The global event handler is executed immediately prior to the event
290# handler that you have optionally specified in each host or
291# service definition. The command argument is the short name of a
292# command definition that you define in your host configuration file.
293# Read the HTML docs for more information.
294
295#global_host_event_handler=somecommand
296#global_service_event_handler=somecommand
297
298
299
300# MAXIMUM CONCURRENT SERVICE CHECKS
301# This option allows you to specify the maximum number of
302# service checks that can be run in parallel at any given time.
303# Specifying a value of 1 for this variable essentially prevents
304# any service checks from being parallelized. A value of 0
305# will not restrict the number of concurrent checks that are
306# being executed.
307
308max_concurrent_checks=0
309
310
311# CHECK RESULT PATH
312# This is directory where Naemon reads check results of host and
313# service checks to further process them.
314#
315# Note: Naemon does not require this folder internally but it still
316# can be used to pass check results to Naemon.
317
318check_result_path=/var/cache/naemon/checkresults
319
320
321# CACHED HOST CHECK HORIZON
322# This option determines the maximum amount of time (in seconds)
323# that the state of a previous host check is considered current.
324# Cached host states (from host checks that were performed more
325# recently that the timeframe specified by this value) can immensely
326# improve performance in regards to the host check logic.
327# Too high of a value for this option may result in inaccurate host
328# states being used by Naemon, while a lower value may result in a
329# performance hit for host checks. Use a value of 0 to disable host
330# check caching.
331
332cached_host_check_horizon=15
333
334
335
336# CACHED SERVICE CHECK HORIZON
337# This option determines the maximum amount of time (in seconds)
338# that the state of a previous service check is considered current.
339# Cached service states (from service checks that were performed more
340# recently that the timeframe specified by this value) can immensely
341# improve performance in regards to predictive dependency checks.
342# Use a value of 0 to disable service check caching.
343
344cached_service_check_horizon=15
345
346
347
348# ENABLE PREDICTIVE HOST DEPENDENCY CHECKS
349# This option determines whether or not Naemon will attempt to execute
350# checks of hosts when it predicts that future dependency logic test
351# may be needed. These predictive checks can help ensure that your
352# host dependency logic works well.
353# Values:
354# 0 = Disable predictive checks
355# 1 = Enable predictive checks (default)
356
357enable_predictive_host_dependency_checks=1
358
359
360
361# ENABLE PREDICTIVE SERVICE DEPENDENCY CHECKS
362# This option determines whether or not Naemon will attempt to execute
363# checks of service when it predicts that future dependency logic test
364# may be needed. These predictive checks can help ensure that your
365# service dependency logic works well.
366# Values:
367# 0 = Disable predictive checks
368# 1 = Enable predictive checks (default)
369
370enable_predictive_service_dependency_checks=1
371
372
373
374# SOFT STATE DEPENDENCIES
375# This option determines whether or not Naemon will use soft state
376# information when checking host and service dependencies. Normally
377# Naemon will only use the latest hard host or service state when
378# checking dependencies. If you want it to use the latest state (regardless
379# of whether its a soft or hard state type), enable this option.
380# Values:
381# 0 = Don't use soft state dependencies (default)
382# 1 = Use soft state dependencies
383
384soft_state_dependencies=0
385
386
387
388# TIME CHANGE ADJUSTMENT THRESHOLDS
389# These options determine when Naemon will react to detected changes
390# in system time (either forward or backwards).
391
392#time_change_threshold=900
393
394
395
396# TIMEOUT VALUES
397# These options control how much time Naemon will allow various
398# types of commands to execute before killing them off. Options
399# are available for controlling maximum time allotted for
400# service checks, host checks, event handlers, notifications, the
401# ocsp command, and performance data commands. All values are in
402# seconds.
403
404service_check_timeout=75
405host_check_timeout=30
406event_handler_timeout=30
407notification_timeout=30
408ocsp_timeout=5
409perfdata_timeout=5
410
411
412
413# RETAIN STATE INFORMATION
414# This setting determines whether or not Naemon will save state
415# information for services and hosts before it shuts down. Upon
416# startup Naemon will reload all saved service and host state
417# information before starting to monitor. This is useful for
418# maintaining long-term data on state statistics, etc, but will
419# slow Naemon down a bit when it (re)starts. Since its only
420# a one-time penalty, I think its well worth the additional
421# startup delay.
422
423retain_state_information=1
424
425
426
427# STATE RETENTION FILE
428# This is the file that Naemon should use to store host and
429# service state information before it shuts down. The state
430# information in this file is also read immediately prior to
431# starting to monitor the network when Naemon is restarted.
432# This file is used only if the retain_state_information
433# variable is set to 1.
434
435state_retention_file=/var/lib/naemon/retention.dat
436
437
438
439# RETENTION DATA UPDATE INTERVAL
440# This setting determines how often (in minutes) that Naemon
441# will automatically save retention data during normal operation.
442# If you set this value to 0, Naemon will not save retention
443# data at regular interval, but it will still save retention
444# data before shutting down or restarting. If you have disabled
445# state retention, this option has no effect.
446
447retention_update_interval=60
448
449
450
451# USE RETAINED PROGRAM STATE
452# This setting determines whether or not Naemon will set
453# program status variables based on the values saved in the
454# retention file. If you want to use retained program status
455# information, set this value to 1. If not, set this value
456# to 0.
457
458use_retained_program_state=1
459
460
461
462# USE RETAINED SCHEDULING INFO
463# This setting determines whether or not Naemon will retain
464# the scheduling info (next check time) for hosts and services
465# based on the values saved in the retention file. If you
466# If you want to use retained scheduling info, set this
467# value to 1. If not, set this value to 0.
468
469use_retained_scheduling_info=1
470
471
472
473# RETAINED ATTRIBUTE MASKS (ADVANCED FEATURE)
474# The following variables are used to specify specific host and
475# service attributes that should *not* be retained by Naemon during
476# program restarts.
477#
478# The values of the masks are bitwise ANDs of values specified
479# by the "MODATTR_" definitions found in include/common.h.
480# For example, if you do not want the current enabled/disabled state
481# of flap detection and event handlers for hosts to be retained, you
482# would use a value of 24 for the host attribute mask...
483# MODATTR_EVENT_HANDLER_ENABLED (8) + MODATTR_FLAP_DETECTION_ENABLED (16) = 24
484
485# This mask determines what host attributes are not retained
486retained_host_attribute_mask=0
487
488# This mask determines what service attributes are not retained
489retained_service_attribute_mask=0
490
491# These two masks determine what process attributes are not retained.
492# There are two masks, because some process attributes have host and service
493# options. For example, you can disable active host checks, but leave active
494# service checks enabled.
495retained_process_host_attribute_mask=0
496retained_process_service_attribute_mask=0
497
498# These two masks determine what contact attributes are not retained.
499# There are two masks, because some contact attributes have host and
500# service options. For example, you can disable host notifications for
501# a contact, but leave service notifications enabled for them.
502retained_contact_host_attribute_mask=0
503retained_contact_service_attribute_mask=0
504
505
506
507# INTERVAL LENGTH
508# This is the seconds per unit interval as used in the
509# host/contact/service configuration files. Setting this to 60 means
510# that each interval is one minute long (60 seconds). Other settings
511# have not been tested much, so your mileage is likely to vary...
512
513interval_length=60
514
515
516
517# AGGRESSIVE HOST CHECKING OPTION
518# If you don't want to turn on aggressive host checking features, set
519# this value to 0 (the default). Otherwise set this value to 1 to
520# enable the aggressive check option. Read the docs for more info
521# on what aggressive host check is or check out the source code in
522# base/checks.c
523
524use_aggressive_host_checking=0
525
526
527
528# SERVICE CHECK EXECUTION OPTION
529# This determines whether or not Naemon will actively execute
530# service checks when it initially starts. If this option is
531# disabled, checks are not actively made, but Naemon can still
532# receive and process passive check results that come in. Unless
533# you're implementing redundant hosts or have a special need for
534# disabling the execution of service checks, leave this enabled!
535# Values: 1 = enable checks, 0 = disable checks
536
537execute_service_checks=1
538
539
540
541# PASSIVE SERVICE CHECK ACCEPTANCE OPTION
542# This determines whether or not Naemon will accept passive
543# service checks results when it initially (re)starts.
544# Values: 1 = accept passive checks, 0 = reject passive checks
545
546accept_passive_service_checks=1
547
548
549
550# HOST CHECK EXECUTION OPTION
551# This determines whether or not Naemon will actively execute
552# host checks when it initially starts. If this option is
553# disabled, checks are not actively made, but Naemon can still
554# receive and process passive check results that come in. Unless
555# you're implementing redundant hosts or have a special need for
556# disabling the execution of host checks, leave this enabled!
557# Values: 1 = enable checks, 0 = disable checks
558
559execute_host_checks=1
560
561
562
563# PASSIVE HOST CHECK ACCEPTANCE OPTION
564# This determines whether or not Naemon will accept passive
565# host checks results when it initially (re)starts.
566# Values: 1 = accept passive checks, 0 = reject passive checks
567
568accept_passive_host_checks=1
569
570
571
572# NOTIFICATIONS OPTION
573# This determines whether or not Naemon will sent out any host or
574# service notifications when it is initially (re)started.
575# Values: 1 = enable notifications, 0 = disable notifications
576
577enable_notifications=1
578
579
580
581# EVENT HANDLER USE OPTION
582# This determines whether or not Naemon will run any host or
583# service event handlers when it is initially (re)started. Unless
584# you're implementing redundant hosts, leave this option enabled.
585# Values: 1 = enable event handlers, 0 = disable event handlers
586
587enable_event_handlers=1
588
589
590
591# PROCESS PERFORMANCE DATA OPTION
592# This determines whether or not Naemon will process performance
593# data returned from service and host checks. If this option is
594# enabled, host performance data will be processed using the
595# host_perfdata_command (defined below) and service performance
596# data will be processed using the service_perfdata_command (also
597# defined below). Read the HTML docs for more information on
598# performance data.
599# Values: 1 = process performance data, 0 = do not process performance data
600
601process_performance_data=0
602
603
604
605# HOST AND SERVICE PERFORMANCE DATA PROCESSING COMMANDS
606# These commands are run after every host and service check is
607# performed. These commands are executed only if the
608# enable_performance_data option (above) is set to 1. The command
609# argument is the short name of a command definition that you
610# define in your host configuration file. Read the HTML docs for
611# more information on performance data.
612
613#host_perfdata_command=process-host-perfdata
614#service_perfdata_command=process-service-perfdata
615
616
617
618# HOST AND SERVICE PERFORMANCE DATA FILES
619# These files are used to store host and service performance data.
620# Performance data is only written to these files if the
621# enable_performance_data option (above) is set to 1.
622
623#host_perfdata_file=/var/lib/naemon/host-perfdata
624#service_perfdata_file=/var/lib/naemon/service-perfdata
625
626
627
628# HOST AND SERVICE PERFORMANCE DATA FILE TEMPLATES
629# These options determine what data is written (and how) to the
630# performance data files. The templates may contain macros, special
631# characters (\t for tab, \r for carriage return, \n for newline)
632# and plain text. A newline is automatically added after each write
633# to the performance data file. Some examples of what you can do are
634# shown below.
635
636#host_perfdata_file_template=[HOSTPERFDATA]\t$TIMET$\t$HOSTNAME$\t$HOSTEXECUTIONTIME$\t$HOSTOUTPUT$\t$HOSTPERFDATA$
637#service_perfdata_file_template=[SERVICEPERFDATA]\t$TIMET$\t$HOSTNAME$\t$SERVICEDESC$\t$SERVICEEXECUTIONTIME$\t$SERVICELATENCY$\t$SERVICEOUTPUT$\t$SERVICEPERFDATA$
638
639
640
641# HOST AND SERVICE PERFORMANCE DATA FILE MODES
642# This option determines whether or not the host and service
643# performance data files are opened in write ("w") or append ("a")
644# mode. If you want to use named pipes, you should use the special
645# pipe ("p") mode which avoid blocking at startup, otherwise you will
646# likely want the defult append ("a") mode.
647
648#host_perfdata_file_mode=a
649#service_perfdata_file_mode=a
650
651
652
653# HOST AND SERVICE PERFORMANCE DATA FILE PROCESSING INTERVAL
654# These options determine how often (in seconds) the host and service
655# performance data files are processed using the commands defined
656# below. A value of 0 indicates the files should not be periodically
657# processed.
658
659#host_perfdata_file_processing_interval=0
660#service_perfdata_file_processing_interval=0
661
662
663
664# HOST AND SERVICE PERFORMANCE DATA FILE PROCESSING COMMANDS
665# These commands are used to periodically process the host and
666# service performance data files. The interval at which the
667# processing occurs is determined by the options above.
668
669#host_perfdata_file_processing_command=process-host-perfdata-file
670#service_perfdata_file_processing_command=process-service-perfdata-file
671
672
673
674# HOST AND SERVICE PERFORMANCE DATA PROCESS EMPTY RESULTS
675# These options determine wether the core will process empty perfdata
676# results or not. This is needed for distributed monitoring, and intentionally
677# turned on by default.
678# If you don't require empty perfdata - saving some cpu cycles
679# on unwanted macro calculation - you can turn that off. Be careful!
680# Values: 1 = enable, 0 = disable
681
682#host_perfdata_process_empty_results=1
683#service_perfdata_process_empty_results=1
684
685
686# OBSESS OVER SERVICE CHECKS OPTION
687# This determines whether or not Naemon will obsess over service
688# checks and run the ocsp_command defined below. Unless you're
689# planning on implementing distributed monitoring, do not enable
690# this option. Read the HTML docs for more information on
691# implementing distributed monitoring.
692# Values: 1 = obsess over services, 0 = do not obsess (default)
693
694obsess_over_services=<%= @naemon_url.nil? ? 0 : 1 %>
695
696
697
698# OBSESSIVE COMPULSIVE SERVICE PROCESSOR COMMAND
699# This is the command that is run for every service check that is
700# processed by Naemon. This command is executed only if the
701# obsess_over_services option (above) is set to 1. The command
702# argument is the short name of a command definition that you
703# define in your host configuration file. Read the HTML docs for
704# more information on implementing distributed monitoring.
705
706<% if !@naemon_url.nil? %>
707ocsp_command=notify-master
708<% end %>
709
710
711
712# OBSESS OVER HOST CHECKS OPTION
713# This determines whether or not Naemon will obsess over host
714# checks and run the ochp_command defined below. Unless you're
715# planning on implementing distributed monitoring, do not enable
716# this option. Read the HTML docs for more information on
717# implementing distributed monitoring.
718# Values: 1 = obsess over hosts, 0 = do not obsess (default)
719
720obsess_over_hosts=0
721
722
723
724# OBSESSIVE COMPULSIVE HOST PROCESSOR COMMAND
725# This is the command that is run for every host check that is
726# processed by Naemon. This command is executed only if the
727# obsess_over_hosts option (above) is set to 1. The command
728# argument is the short name of a command definition that you
729# define in your host configuration file. Read the HTML docs for
730# more information on implementing distributed monitoring.
731
732#ochp_command=somecommand
733
734
735
736# TRANSLATE PASSIVE HOST CHECKS OPTION
737# This determines whether or not Naemon will translate
738# DOWN/UNREACHABLE passive host check results into their proper
739# state for this instance of Naemon. This option is useful
740# if you have distributed or failover monitoring setup. In
741# these cases your other Naemon servers probably have a different
742# "view" of the network, with regards to the parent/child relationship
743# of hosts. If a distributed monitoring server thinks a host
744# is DOWN, it may actually be UNREACHABLE from the point of
745# this Naemon instance. Enabling this option will tell Naemon
746# to translate any DOWN or UNREACHABLE host states it receives
747# passively into the correct state from the view of this server.
748# Values: 1 = perform translation, 0 = do not translate (default)
749
750translate_passive_host_checks=0
751
752
753
754# PASSIVE HOST CHECKS ARE SOFT OPTION
755# This determines whether or not Naemon will treat passive host
756# checks as being HARD or SOFT. By default, a passive host check
757# result will put a host into a HARD state type. This can be changed
758# by enabling this option.
759# Values: 0 = passive checks are HARD, 1 = passive checks are SOFT
760
761passive_host_checks_are_soft=0
762
763
764
765# ORPHANED HOST/SERVICE CHECK OPTIONS
766# These options determine whether or not Naemon will periodically
767# check for orphaned host service checks. Since service checks are
768# not rescheduled until the results of their previous execution
769# instance are processed, there exists a possibility that some
770# checks may never get rescheduled. A similar situation exists for
771# host checks, although the exact scheduling details differ a bit
772# from service checks. Orphaned checks seem to be a rare
773# problem and should not happen under normal circumstances.
774# If you have problems with service checks never getting
775# rescheduled, make sure you have orphaned service checks enabled.
776# Values: 1 = enable checks, 0 = disable checks
777
778check_for_orphaned_services=1
779check_for_orphaned_hosts=1
780
781
782
783# SERVICE FRESHNESS CHECK OPTION
784# This option determines whether or not Naemon will periodically
785# check the "freshness" of service results. Enabling this option
786# is useful for ensuring passive checks are received in a timely
787# manner.
788# Values: 1 = enabled freshness checking, 0 = disable freshness checking
789
790check_service_freshness=1
791
792
793
794# SERVICE FRESHNESS CHECK INTERVAL
795# This setting determines how often (in seconds) Naemon will
796# check the "freshness" of service check results. If you have
797# disabled service freshness checking, this option has no effect.
798
799service_freshness_check_interval=60
800
801
802
803# SERVICE CHECK TIMEOUT STATE
804# This setting determines the state Naemon will report when a
805# service check times out - that is does not respond within
806# service_check_timeout seconds. This can be useful if a
807# machine is running at too high a load and you do not want
808# to consider a failed service check to be critical (the default).
809# Valid settings are:
810# c - Critical (default)
811# u - Unknown
812# w - Warning
813# o - OK
814
815service_check_timeout_state=c
816
817
818
819# HOST FRESHNESS CHECK OPTION
820# This option determines whether or not Naemon will periodically
821# check the "freshness" of host results. Enabling this option
822# is useful for ensuring passive checks are received in a timely
823# manner.
824# Values: 1 = enabled freshness checking, 0 = disable freshness checking
825
826check_host_freshness=0
827
828
829
830# HOST FRESHNESS CHECK INTERVAL
831# This setting determines how often (in seconds) Naemon will
832# check the "freshness" of host check results. If you have
833# disabled host freshness checking, this option has no effect.
834
835host_freshness_check_interval=60
836
837
838
839
840# ADDITIONAL FRESHNESS THRESHOLD LATENCY
841# This setting determines the number of seconds that Naemon
842# will add to any host and service freshness thresholds that
843# it calculates (those not explicitly specified by the user).
844
845additional_freshness_latency=15
846
847
848
849
850# FLAP DETECTION OPTION
851# This option determines whether or not Naemon will try
852# and detect hosts and services that are "flapping".
853# Flapping occurs when a host or service changes between
854# states too frequently. When Naemon detects that a
855# host or service is flapping, it will temporarily suppress
856# notifications for that host/service until it stops
857# flapping. Flap detection is very experimental, so read
858# the HTML documentation before enabling this feature!
859# Values: 1 = enable flap detection
860# 0 = disable flap detection (default)
861
862enable_flap_detection=1
863
864
865
866# FLAP DETECTION THRESHOLDS FOR HOSTS AND SERVICES
867# Read the HTML documentation on flap detection for
868# an explanation of what this option does. This option
869# has no effect if flap detection is disabled.
870
871low_service_flap_threshold=5.0
872high_service_flap_threshold=20.0
873low_host_flap_threshold=5.0
874high_host_flap_threshold=20.0
875
876
877
878# DATE FORMAT OPTION
879# This option determines how short dates are displayed. Valid options
880# include:
881# us (MM-DD-YYYY HH:MM:SS)
882# euro (DD-MM-YYYY HH:MM:SS)
883# iso8601 (YYYY-MM-DD HH:MM:SS)
884# strict-iso8601 (YYYY-MM-DDTHH:MM:SS)
885#
886
887date_format=iso8601
888
889
890
891
892# TIMEZONE OFFSET
893# This option is used to override the default timezone that this
894# instance of Naemon runs in. If not specified, Naemon will use
895# the system configured timezone.
896
897#use_timezone=US/Mountain
898#use_timezone=Australia/Brisbane
899
900
901
902# ILLEGAL OBJECT NAME CHARACTERS
903# This option allows you to specify illegal characters that cannot
904# be used in host names, service descriptions, or names of other
905# object types.
906
907illegal_object_name_chars=`~!$%^&*|'"<>?,()=
908
909
910
911# ILLEGAL MACRO OUTPUT CHARACTERS
912# This option allows you to specify illegal characters that are
913# stripped from macros before being used in notifications, event
914# handlers, etc. This DOES NOT affect macros used in service or
915# host check commands.
916# The following macros are stripped of the characters you specify:
917# $HOSTOUTPUT$
918# $HOSTPERFDATA$
919# $HOSTACKAUTHOR$
920# $HOSTACKCOMMENT$
921# $SERVICEOUTPUT$
922# $SERVICEPERFDATA$
923# $SERVICEACKAUTHOR$
924# $SERVICEACKCOMMENT$
925
926illegal_macro_output_chars=`~$&|'"<>
927
928
929
930# REGULAR EXPRESSION MATCHING
931# This option controls whether or not regular expression matching
932# takes place in the object config files. Regular expression
933# matching is used to match host, hostgroup, service, and service
934# group names/descriptions in some fields of various object types.
935# Values: 1 = enable regexp matching, 0 = disable regexp matching
936
937use_regexp_matching=0
938
939
940
941# "TRUE" REGULAR EXPRESSION MATCHING
942# This option controls whether or not "true" regular expression
943# matching takes place in the object config files. This option
944# only has an effect if regular expression matching is enabled
945# (see above). If this option is DISABLED, regular expression
946# matching only occurs if a string contains wildcard characters
947# (* and ?). If the option is ENABLED, regexp matching occurs
948# all the time (which can be annoying).
949# Values: 1 = enable true matching, 0 = disable true matching
950
951use_true_regexp_matching=0
952
953
954
955# ADMINISTRATOR EMAIL/PAGER ADDRESSES
956# The email and pager address of a global administrator (likely you).
957# Naemon never uses these values itself, but you can access them by
958# using the $ADMINEMAIL$ and $ADMINPAGER$ macros in your notification
959# commands.
960
961admin_email=naemon@localhost
962admin_pager=pagenaemon@localhost
963
964
965
966# DEBUG LEVEL
967# This option determines how much (if any) debugging information will
968# be written to the debug file. OR values together to log multiple
969# types of information.
970# Values:
971# -1 = Everything
972# 0 = Nothing
973# 1 = Functions
974# 2 = Configuration
975# 4 = Process information
976# 8 = Scheduled events
977# 16 = Host/service checks
978# 32 = Notifications
979# 64 = Event broker
980# 128 = External commands
981# 256 = Commands
982# 512 = Scheduled downtime
983# 1024 = Comments
984# 2048 = Macros
985
986debug_level=0
987
988
989
990# DEBUG VERBOSITY
991# This option determines how verbose the debug log out will be.
992# Values: 0 = Brief output
993# 1 = More detailed
994# 2 = Very detailed
995
996debug_verbosity=1
997
998
999
1000# DEBUG FILE
1001# This option determines where Naemon should write debugging information.
1002
1003debug_file=/var/lib/naemon/naemon.debug
1004
1005
1006
1007# MAX DEBUG FILE SIZE
1008# This option determines the maximum size (in bytes) of the debug file. If
1009# the file grows larger than this size, it will be renamed with a .old
1010# extension. If a file already exists with a .old extension it will
1011# automatically be deleted. This helps ensure your disk space usage doesn't
1012# get out of control when debugging Naemon.
1013
1014max_debug_file_size=1000000
1015
1016
1017
1018# Should we allow hostgroups to have no hosts, we default this to off since
1019# that was the old behavior
1020
1021allow_empty_hostgroup_assignment=0
1022
1023
1024
1025# Normally worker count is dynamically allocated based on 1.5 * number of cpu's
1026# with a minimum of 4 workers. This value will override the defaults
1027
1028#check_workers=3
1029
1030# CIRCULAR DEPENDENCIES (EXPERIMENTAL)
1031# Allow for circular dependencies in naemon's host graph.
1032# Enabaling this will cause propagation the following to stop working:
1033# * scheduling downtime
1034# * enabling notification
1035# * disabling notification
1036# This feature is experimental and bugs might occur.
1037
1038allow_circular_dependencies=0
diff --git a/modules/profile/templates/monitoring/resource.cfg.erb b/modules/profile/templates/monitoring/resource.cfg.erb
new file mode 100644
index 0000000..5a5c3ee
--- /dev/null
+++ b/modules/profile/templates/monitoring/resource.cfg.erb
@@ -0,0 +1,30 @@
1###########################################################################
2#
3# RESOURCE.CFG - Sample Resource File for Naemon 1.0.7
4#
5#
6# You can define $USERx$ macros in this file, which can in turn be used
7# in command definitions in your host config file(s). $USERx$ macros are
8# useful for storing sensitive information such as usernames, passwords,
9# etc. They are also handy for specifying the path to plugins and
10# event handlers - if you decide to move the plugins or event handlers to
11# a different directory in the future, you can just update one or two
12# $USERx$ macros, instead of modifying a lot of command definitions.
13#
14# Naemon supports up to 256 $USERx$ macros ($USER1$ through $USER256$)
15#
16# Resource files may also be used to store configuration directives for
17# external data sources like MySQL...
18#
19###########################################################################
20
21# Sets $USER1$ to be the path to the plugins
22$USER1$=/usr/lib/monitoring-plugins
23$USER2$=<%= @plugins %>
24
25# Sets $USER2$ to be the path to event handlers
26#$USER2$=/usr/lib/monitoring-plugins/eventhandlers
27
28# Store some usernames and passwords (hidden from the CGIs)
29#$USER3$=someuser
30#$USER4$=somepassword
diff --git a/modules/profile/templates/monitoring/send_nrdp.sh.erb b/modules/profile/templates/monitoring/send_nrdp.sh.erb
new file mode 100755
index 0000000..41f58e5
--- /dev/null
+++ b/modules/profile/templates/monitoring/send_nrdp.sh.erb
@@ -0,0 +1,271 @@
1#!/bin/bash
2#
3# check_nrdp.sh
4#
5# Copyright (c) 2010-2017 - Nagios Enterprises, LLC.
6# Written by: Scott Wilkerson (nagios@nagios.org)
7#
8# 2017-09-25 Troy Lea aka BOX293
9# - Fixed script not working with arguments when run as a cron job
10# or if being used as a nagios command like obsessive compulsive.
11# ... "if [ ! -t 0 ]" was the reason why.
12# 2017-12-08 Jørgen van der Meulen (Conclusion Xforce)
13# - Fixed typo in NRDP abbreviation
14
15
16PROGNAME=$(basename $0)
17RELEASE="Revision 0.6.1"
18
19print_release() {
20 echo "$RELEASE"
21}
22
23print_usage() {
24 echo ""
25 echo "$PROGNAME $RELEASE - Send NRDP script for Nagios"
26 echo ""
27 echo "Usage: send_nrdp.sh -u URL -t token [options]"
28 echo ""
29 echo "Usage: $PROGNAME -h display help"
30 echo ""
31}
32
33print_help() {
34 print_usage
35 echo ""
36 echo "This script is used to send NRDP data to a Nagios server"
37 echo ""
38 echo "Required:"
39 echo " -u"," URL of NRDP server. Usually http://<IP_ADDRESS>/nrdp/"
40 echo " -t"," Shared token. Must be the same token set in NRDP Server"
41 echo ""
42 echo "Options:"
43 echo " Single Check:"
44 echo " -H host name"
45 echo " -s service name"
46 echo " -S State"
47 echo " -o output"
48 echo ""
49 echo " STDIN:"
50 echo " [-d delimiter] (default -d \"\\t\")"
51 echo " With only the required parameters $PROGNAME is capable of"
52 echo " processing data piped to it either from a file or other"
53 echo " process. By default, we use \t as the delimiter however this"
54 echo " may be specified with the -d option data should be in the"
55 echo " following formats one entry per line."
56 echo " For Host checks:"
57 echo " hostname State output"
58 echo " For Service checks"
59 echo " hostname servicename State output"
60 echo ""
61 echo " File:"
62 echo " -f /full/path/to/file"
63 echo " This file will be sent to the NRDP server specified in -u"
64 echo " The file should be an XML file in the following format"
65 echo " ##################################################"
66 echo ""
67 echo " <?xml version='1.0'?>"
68 echo " <checkresults>"
69 echo " <checkresult type=\"host\" checktype=\"1\">"
70 echo " <hostname>YOUR_HOSTNAME</hostname>"
71 echo " <state>0</state>"
72 echo " <output>OK|perfdata=1.00;5;10;0</output>"
73 echo " </checkresult>"
74 echo " <checkresult type=\"service\" checktype=\"1\">"
75 echo " <hostname>YOUR_HOSTNAME</hostname>"
76 echo " <servicename>YOUR_SERVICENAME</servicename>"
77 echo " <state>0</state>"
78 echo " <output>OK|perfdata=1.00;5;10;0</output>"
79 echo " </checkresult>"
80 echo " </checkresults>"
81 echo " ##################################################"
82 echo ""
83 echo " Directory:"
84 echo " -D /path/to/temp/dir"
85 echo " This is a directory that contains XML files in the format"
86 echo " above. Additionally, if the -d flag is specified, $PROGNAME"
87 echo " will create temp files here if the server could not be reached."
88 echo " On additional calls with the same -D path, if a connection to"
89 echo " the server is successful, all temp files will be sent."
90 exit 0
91}
92
93send_data() {
94 pdata="token=$token&cmd=submitcheck"
95 if [ $file ]; then
96 fdata="--data-urlencode XMLDATA@$file"
97 rslt=`curl -f --silent --insecure -d "$pdata" $fdata "$url/"`
98 else
99 pdata="$pdata&XMLDATA=$1"
100 rslt=`curl -f --silent --insecure -d "$pdata" "$url/"`
101 fi
102
103 ret=$?
104
105 status=`echo $rslt | sed -n 's|.*<status>\(.*\)</status>.*|\1|p'`
106 message=`echo $rslt | sed -n 's|.*<message>\(.*\)</message>.*|\1|p'`
107 if [ $ret != 0 ];then
108 echo "ERROR: could not connect to NRDP server at $url"
109 # verify we are not processing the directory already and then write to the directory
110 if [ ! "$2" ] && [ $directory ];then
111 if [ ! -d "$directory" ];then
112 mkdir -p "$directory"
113 fi
114 # This is where we write to the tmp directory
115 echo $xml > `mktemp $directory/nrdp.XXXXXX`
116 fi
117 exit 1
118 fi
119
120 if [ "$status" != "0" ];then
121 # This means we couldn't connect to NRPD server
122 echo "ERROR: The NRDP Server said $message"
123 # verify we are not processing the directory already and then write to the directory
124 if [ ! "$2" ] && [ $directory ];then
125 if [ ! -d "$directory" ];then
126 mkdir -p "$directory"
127 fi
128 # This is where we write to the tmp directory
129 echo $xml > `mktemp $directory/nrdp.XXXXXX`
130 fi
131
132 exit 2
133 fi
134
135 # If this was a directory call and was successful, remove the file
136 if [ $2 ] && [ "$status" == "0" ];then
137 rm -f "$2"
138 fi
139
140 # If we weren't successful error
141 if [ $ret != 0 ];then
142 echo "exited with error "$ret
143 exit $ret
144 fi
145}
146
147# Parse parameters
148url="<%= @naemon_url %>"
149token="<%= @naemon_token %>"
150
151while getopts "u:t:H:s:S:o:f:d:c:D:hv" option
152do
153 case $option in
154 u) url=$OPTARG ;;
155 t) token=$OPTARG ;;
156 H) host=$OPTARG ;;
157 s) service=$OPTARG ;;
158 S) State=$OPTARG ;;
159 o) output=$OPTARG ;;
160 f) file=$OPTARG ;;
161 d) delim=$OPTARG ;;
162 c) checktype=$OPTARG ;;
163 D) directory=$OPTARG ;;
164 h) print_help 0;;
165 v) print_release
166 exit 0 ;;
167 esac
168done
169
170if [ ! $checktype ]; then
171 checktype=1
172fi
173if [ ! $delim ]; then
174 delim=`echo -e "\t"`
175fi
176
177if [ "x$url" == "x" -o "x$token" == "x" ]
178then
179 echo "Usage: send_nrdp -u url -t token"
180 exit 1
181fi
182# detecting curl
183if [[ `which curl` =~ "/curl" ]]
184 then curl=1;
185fi
186
187if [[ ! $curl ]];
188then
189 echo "Either curl or wget are required to run $PROGNAME"
190 exit 1
191fi
192
193checkcount=0
194
195if [ $host ]; then
196 xml=""
197 # we are not getting piped results
198 if [ "$host" == "" ] || [ "$State" == "" ]; then
199 echo "You must provide a host -H and State -S"
200 exit 2
201 fi
202 if [ "$service" != "" ]; then
203 xml="$xml<checkresult type='service' checktype='$checktype'><servicename>$service</servicename>"
204 else
205 xml="$xml<checkresult type='host' checktype='$checktype'>"
206 fi
207
208 # urlencode XML special chars
209 output=${output//&/%26}
210 output=${output//</%3C}
211 output=${output//>/%3E}
212
213 xml="$xml<hostname>$host</hostname><state>$State</state><output><![CDATA["$output"]]></output></checkresult>"
214 checkcount=1
215fi
216
217 # If only url and token have been provided then it is assumed that data is being piped
218########################
219if [[ ! $host && ! $State && ! $file && ! $directory ]]; then
220 xml=""
221 # we know we are being piped results
222 IFS=$delim
223
224 while read -r line ; do
225 arr=($line)
226 if [ ${#arr[@]} != 0 ];then
227 if [[ ${#arr[@]} < 3 ]] || [[ ${#arr[@]} > 4 ]];then
228 echo "ERROR: STDIN must be either 3 or 4 fields long, I found "${#arr[@]}
229 else
230 if [ ${#arr[@]} == 4 ]; then
231 xml="$xml<checkresult type='service' checktype='$checktype'>
232 <servicename>${arr[1]}</servicename>
233 <hostname>${arr[0]}</hostname>
234 <state>${arr[2]}</state>
235 <output>${arr[3]}</output>"
236 else
237 xml="$xml<checkresult type='host' checktype='$checktype'>
238 <hostname>${arr[0]}</hostname>
239 <state>${arr[1]}</state>
240 <output>${arr[2]}</output>"
241 fi
242
243 xml="$xml</checkresult>"
244 checkcount=$[checkcount+1]
245 fi
246 fi
247 done
248 IFS=" "
249fi
250
251if [ $file ]; then
252 xml=`cat $file`
253 send_data "$xml"
254fi
255
256if [ $directory ]; then
257 #echo "Processing directory..."
258 for f in `ls $directory`
259 do
260 #echo "Processing $f file..."
261 # take action on each file. $f store current file name
262 xml=`cat $directory/$f`
263 send_data "$xml" "$directory/$f"
264 done
265fi
266
267if [ "x$file" == "x" ] && [ "x$directory" == "x" ]; then
268 xml="<?xml version='1.0'?><checkresults>$xml</checkresults>"
269 send_data "$xml"
270 echo "Sent $checkcount checks to $url"
271fi
diff --git a/modules/role/manifests/backup.pp b/modules/role/manifests/backup.pp
index b35c542..6b8d00c 100644
--- a/modules/role/manifests/backup.pp
+++ b/modules/role/manifests/backup.pp
@@ -14,6 +14,7 @@ class role::backup (
14 include "profile::xmr_stak" 14 include "profile::xmr_stak"
15 include "profile::known_hosts" 15 include "profile::known_hosts"
16 include "profile::boinc" 16 include "profile::boinc"
17 include "profile::monitoring"
17 18
18 include "role::backup::postgresql" 19 include "role::backup::postgresql"
19 20
@@ -124,5 +125,11 @@ class role::backup (
124 order => "$order_dirname-$order_part", 125 order => "$order_dirname-$order_part",
125 } 126 }
126 } 127 }
128
129 @profile::monitoring::local_service { "Last backup in $base is not too old":
130 local => {
131 check_command => "check_last_file_date!$base!14",
132 }
133 }
127 } 134 }
128} 135}
diff --git a/modules/role/manifests/caldance.pp b/modules/role/manifests/caldance.pp
index 75d9dbd..b7948e6 100644
--- a/modules/role/manifests/caldance.pp
+++ b/modules/role/manifests/caldance.pp
@@ -6,6 +6,7 @@ class role::caldance (
6 include "profile::postgresql" 6 include "profile::postgresql"
7 include "profile::apache" 7 include "profile::apache"
8 include "profile::redis" 8 include "profile::redis"
9 include "profile::monitoring"
9 10
10 ensure_packages(["python-pip", "python-virtualenv", "python-django"]) 11 ensure_packages(["python-pip", "python-virtualenv", "python-django"])
11} 12}
diff --git a/modules/role/manifests/cryptoportfolio.pp b/modules/role/manifests/cryptoportfolio.pp
index c675e91..8f7bfca 100644
--- a/modules/role/manifests/cryptoportfolio.pp
+++ b/modules/role/manifests/cryptoportfolio.pp
@@ -41,7 +41,6 @@ class role::cryptoportfolio (
41 contain "role::cryptoportfolio::bot" 41 contain "role::cryptoportfolio::bot"
42 } 42 }
43 43
44 # FIXME: restore backup
45 unless empty($front_version) { 44 unless empty($front_version) {
46 contain "role::cryptoportfolio::front" 45 contain "role::cryptoportfolio::front"
47 } 46 }
diff --git a/modules/role/manifests/etherpad.pp b/modules/role/manifests/etherpad.pp
index 5ab5023..119af56 100644
--- a/modules/role/manifests/etherpad.pp
+++ b/modules/role/manifests/etherpad.pp
@@ -22,6 +22,7 @@ class role::etherpad (
22 include "profile::tools" 22 include "profile::tools"
23 include "profile::postgresql" 23 include "profile::postgresql"
24 include "profile::apache" 24 include "profile::apache"
25 include "profile::monitoring"
25 26
26 ensure_packages(["npm"]) 27 ensure_packages(["npm"])
27 ensure_packages(["abiword"]) 28 ensure_packages(["abiword"])
@@ -121,4 +122,17 @@ class role::etherpad (
121 proxy_preserve_host => true; 122 proxy_preserve_host => true;
122 default: * => $::profile::apache::apache_vhost_default; 123 default: * => $::profile::apache::apache_vhost_default;
123 } 124 }
125
126 @profile::monitoring::external_service { "Etherpad service is running on $web_host":
127 type => "web",
128 master => {
129 check_command => "check_https!$web_host!/!<title>Etherpad"
130 }
131 }
132 @profile::monitoring::external_service { "$web_host ssl certificate is up to date":
133 type => "web",
134 master => {
135 check_command => "check_https_certificate!$web_host"
136 }
137 }
124} 138}
diff --git a/readme.md b/readme.md
new file mode 100644
index 0000000..74bb294
--- /dev/null
+++ b/readme.md
@@ -0,0 +1,31 @@
1# Puppet configuration repository for immae.eu's services
2
3This repository has the aim to help automate the installation of servers
4planned for a specific task, with the help of Puppet. The host are
5supposed to be listed in an LDAP-like database, which will contain the
6necessary credentials, variable configuration and secrets for each
7server.
8
9## Structure
10
11The repository is structured along Puppet modules (`modules/`
12directory). Each machine has one or several `role`, which determine the
13set of programs and configuration to install. Each role may be
14standalone, or require a set of `profile`, which is seen as a
15reusable component. (The structure is inspired from the tutorial at
16[https://www.craigdunn.org/2012/05/239/](https://www.craigdunn.org/2012/05/239/) )
17
18
19## TODO
20
21- Complete documentation
22- Add some monitoring:
23 - modules/profile/manifests/postgresql/ssl.pp (check postgresql certificate)
24 - modules/profile/manifests/postgresql/backup\_pgbouncer.pp (check pgbouncer works)
25 - modules/profile/manifests/mail.pp (check e-mails are going through)
26 - modules/profile/manifests/redis.pp (check redis is running)
27 - modules/role/manifests/cryptoportfolio (role-specific checks)
28- Add redis replication and dumps
29- Restore backups for cryptoportfolio
30- Ensure latest by default for packages
31- try to do a mkfs.ext4 for cloud vps
diff --git a/scripts/ovh_cloud_instance/arch_host_script.sh b/scripts/ovh_cloud_instance/arch_host_script.sh
index 42dcc4a..378b0be 100755
--- a/scripts/ovh_cloud_instance/arch_host_script.sh
+++ b/scripts/ovh_cloud_instance/arch_host_script.sh
@@ -25,7 +25,7 @@ PART="/dev/disk/by-uuid/$UUID"
25# mkfs.ext4 -F -U "$UUID" "$DEVICE" 25# mkfs.ext4 -F -U "$UUID" "$DEVICE"
26sudo mount "$DEVICE" /mnt 26sudo mount "$DEVICE" /mnt
27 27
28##### FIXME: mkfs.ext4 would be better #### 28##### mkfs.ext4 would be better ####
29for i in /mnt/*; do 29for i in /mnt/*; do
30 if [ "$i" = "/mnt/boot" ]; then 30 if [ "$i" = "/mnt/boot" ]; then
31 # keep /boot/grub 31 # keep /boot/grub
@@ -34,7 +34,7 @@ for i in /mnt/*; do
34 sudo rm -rf $i 34 sudo rm -rf $i
35 fi 35 fi
36done 36done
37##### /FIXME #### 37##### / ####
38 38
39sudo pacstrap -G /mnt base git puppet 39sudo pacstrap -G /mnt base git puppet
40 40
diff --git a/scripts/ovh_vps_ssd/arch_chroot_script.sh b/scripts/ovh_vps_ssd/arch_chroot_script.sh
index 7b7887f..57e793b 100755
--- a/scripts/ovh_vps_ssd/arch_chroot_script.sh
+++ b/scripts/ovh_vps_ssd/arch_chroot_script.sh
@@ -10,7 +10,7 @@ DEVICE=$(realpath "$PART")
10# mkfs.ext4 -F -U "$UUID" "$DEVICE" 10# mkfs.ext4 -F -U "$UUID" "$DEVICE"
11mount "$DEVICE" /mnt 11mount "$DEVICE" /mnt
12 12
13##### FIXME: mkfs.ext4 would be better #### 13##### mkfs.ext4 would be better ####
14for i in /mnt/*; do 14for i in /mnt/*; do
15 if [ "$i" = "/mnt/boot" ]; then 15 if [ "$i" = "/mnt/boot" ]; then
16 # keep /boot/grub 16 # keep /boot/grub
@@ -19,7 +19,7 @@ for i in /mnt/*; do
19 rm -rf $i 19 rm -rf $i
20 fi 20 fi
21done 21done
22##### /FIXME #### 22##### / ####
23 23
24pacstrap -G /mnt base git puppet 24pacstrap -G /mnt base git puppet
25 25