Merge branch 'monitoring' into dev

author Ismaël Bouya <ismael.bouya@normalesup.org>

Wed, 11 Jul 2018 07:31:24 +0000 (09:31 +0200)

committer Ismaël Bouya <ismael.bouya@normalesup.org>

Wed, 11 Jul 2018 07:31:24 +0000 (09:31 +0200)
author Ismaël Bouya <ismael.bouya@normalesup.org>
Wed, 11 Jul 2018 07:31:24 +0000 (09:31 +0200)
committer Ismaël Bouya <ismael.bouya@normalesup.org>
Wed, 11 Jul 2018 07:31:24 +0000 (09:31 +0200)
diff --git a/modules/base_installation/lib/puppet/provider/package/pacman.rb b/modules/base_installation/lib/puppet/provider/package/pacman.rb

new file mode 100644 (file)

index 0000000..0a5e5d0
--- /dev/null
+++ b/modules/base_installation/lib/puppet/provider/package/pacman.rb
@@ -0,0 +1,283 @@
+require 'puppet/provider/package'
+require 'set'
+require 'uri'
+
+Puppet::Type.type(:package).provide :pacman, :parent => Puppet::Provider::Package do
+  desc "Support for the Package Manager Utility (pacman) used in Archlinux.
+
+  This provider supports the `install_options` attribute, which allows command-line flags to be passed to pacman.
+  These options should be specified as a string (e.g. '--flag'), a hash (e.g. {'--flag' => 'value'}),
+  or an array where each element is either a string or a hash."
+
+  # If aura is installed, we can make use of it
+  def self.aura?
+    @aura ||= Puppet::FileSystem.exist?('/usr/bin/aura')
+  end
+
+  commands :pacman => "/usr/bin/pacman"
+  # Aura is a common AUR helper which, if installed, we can use to query the AUR
+  commands :aura => "/usr/bin/aura" if aura?
+
+  confine     :operatingsystem => [:archlinux, :manjarolinux]
+  defaultfor  :operatingsystem => [:archlinux, :manjarolinux]
+  has_feature :install_options
+  has_feature :uninstall_options
+  has_feature :upgradeable
+  has_feature :virtual_packages
+
+  # Checks if a given name is a group
+  def self.group?(name)
+    begin
+      !pacman("-Sg", name).empty?
+    rescue Puppet::ExecutionFailure
+      # pacman returns an expected non-zero exit code when the name is not a group
+      false
+    end
+  end
+
+  # Install a package using 'pacman', or 'aura' if available.
+  # Installs quietly, without confirmation or progress bar, updates package
+  # list from servers defined in pacman.conf.
+  def install
+    if @resource[:source]
+      install_from_file
+    else
+      install_from_repo
+    end
+
+    unless self.query
+      fail(_("Could not find package '%{name}'") % { name: @resource[:name] })
+    end
+  end
+
+  # Fetch the list of packages and package groups that are currently installed on the system.
+  # Only package groups that are fully installed are included. If a group adds packages over time, it will not
+  # be considered as fully installed any more, and we would install the new packages on the next run.
+  # If a group removes packages over time, nothing will happen. This is intended.
+  def self.instances
+    instances = []
+
+    # Get the installed packages
+    installed_packages = get_installed_packages
+    installed_packages.sort_by { |k, _| k }.each do |package, version|
+      instances << new(to_resource_hash(package, version))
+    end
+
+    # Get the installed groups
+    get_installed_groups(installed_packages).each do |group, version|
+      instances << new(to_resource_hash(group, version))
+    end
+
+    instances
+  end
+
+  # returns a hash package => version of installed packages
+  def self.get_installed_packages
+    begin
+      packages = {}
+      execpipe([command(:pacman), "-Q"]) do |pipe|
+        # pacman -Q output is 'packagename version-rel'
+        regex = %r{^(\S+)\s(\S+)}
+        pipe.each_line do |line|
+          if match = regex.match(line)
+            packages[match.captures[0]] = match.captures[1]
+          else
+            warning(_("Failed to match line '%{line}'") % { line: line })
+          end
+        end
+      end
+      packages
+    rescue Puppet::ExecutionFailure
+      fail(_("Error getting installed packages"))
+    end
+  end
+
+  # returns a hash of group => version of installed groups
+  def self.get_installed_groups(installed_packages, filter = nil)
+    groups = {}
+    begin
+      # Build a hash of group name => list of packages
+      command = [command(:pacman), "-Sgg"]
+      command << filter if filter
+      execpipe(command) do |pipe|
+        pipe.each_line do |line|
+          name, package = line.split
+          packages = (groups[name] ||= [])
+          packages << package
+        end
+      end
+
+      # Remove any group that doesn't have all its packages installed
+      groups.delete_if do |_, packages|
+        !packages.all? { |package| installed_packages[package] }
+      end
+
+      # Replace the list of packages with a version string consisting of packages that make up the group
+      groups.each do |name, packages|
+        groups[name] = packages.sort.map {|package| "#{package} #{installed_packages[package]}"}.join ', '
+      end
+    rescue Puppet::ExecutionFailure
+      # pacman returns an expected non-zero exit code when the filter name is not a group
+      raise unless filter
+    end
+    groups
+  end
+
+  # Because Archlinux is a rolling release based distro, installing a package
+  # should always result in the newest release.
+  def update
+    # Install in pacman can be used for update, too
+    self.install
+  end
+
+  # We rescue the main check from Pacman with a check on the AUR using aura, if installed
+  def latest
+    # Synchronize the database
+    pacman "-Sy"
+
+    resource_name = @resource[:name]
+
+    # If target is a group, construct the group version
+    return pacman("-Sp", "--print-format", "%n %v", resource_name).lines.map{ |line| line.chomp }.sort.join(', ') if self.class.group?(resource_name)
+
+    # Start by querying with pacman first
+    # If that fails, retry using aura against the AUR
+    pacman_check = true
+    begin
+      if pacman_check
+        output = pacman "-Sp", "--print-format", "%v", resource_name
+        return output.chomp
+      else
+        output = aura "-Ai", resource_name
+        output.split("\n").each do |line|
+          return line.split[2].chomp if line.split[0] =~ /Version/
+        end
+      end
+    rescue Puppet::ExecutionFailure
+      if pacman_check and self.class.aura?
+        pacman_check = false # now try the AUR
+        retry
+      else
+        raise
+      end
+    end
+  end
+
+  # Queries information for a package or package group
+  def query
+    installed_packages = self.class.get_installed_packages
+    resource_name = @resource[:name]
+
+    # Check for the resource being a group
+    version = self.class.get_installed_groups(installed_packages, resource_name)[resource_name]
+
+    if version
+      unless @resource.allow_virtual?
+        warning(_("%{resource_name} is a group, but allow_virtual is false.") % { resource_name: resource_name })
+        return nil
+      end
+    else
+      version = installed_packages[resource_name]
+    end
+
+    # Return nil if no package or group found
+    return nil unless version
+
+    self.class.to_resource_hash(resource_name, version)
+  end
+
+  def self.to_resource_hash(name, version)
+    {
+      :name     => name,
+      :ensure   => version,
+      :provider => self.name
+    }
+  end
+
+  # Removes a package from the system.
+  def uninstall
+    resource_name = @resource[:name]
+
+    is_group = self.class.group?(resource_name)
+
+    fail(_("Refusing to uninstall package group %{resource_name}, because allow_virtual is false.") % { resource_name: resource_name }) if is_group && !@resource.allow_virtual?
+
+    cmd = %w{--noconfirm --noprogressbar}
+    cmd += uninstall_options if @resource[:uninstall_options]
+    cmd << "-R"
+    cmd << '-s' if is_group
+    cmd << resource_name
+
+    if self.class.aura?
+      aura(*cmd)
+    else
+      pacman(*cmd)
+    end
+  end
+
+  private
+
+  def install_with_aura?
+    resource_name = @resource[:name]
+    if !self.class.aura?
+      return false
+    end
+
+    begin
+      pacman "-Sp", resource_name
+      return false
+    rescue Puppet::ExecutionFailure
+      return true
+    end
+  end
+
+  def install_options
+    join_options(@resource[:install_options])
+  end
+
+  def uninstall_options
+    join_options(@resource[:uninstall_options])
+  end
+
+  def install_from_file
+    source = @resource[:source]
+    begin
+      source_uri = URI.parse source
+    rescue => detail
+      self.fail Puppet::Error, _("Invalid source '%{source}': %{detail}") % { source: source, detail: detail }, detail
+    end
+
+    source = case source_uri.scheme
+    when nil then source
+    when /https?/i then source
+    when /ftp/i then source
+    when /file/i then source_uri.path
+    when /puppet/i
+      fail _("puppet:// URL is not supported by pacman")
+    else
+      fail _("Source %{source} is not supported by pacman") % { source: source }
+    end
+    pacman "--noconfirm", "--noprogressbar", "-Sy"
+    pacman "--noconfirm", "--noprogressbar", "-U", source
+  end
+
+  def install_from_repo
+    resource_name = @resource[:name]
+
+    # Refuse to install if not allowing virtual packages and the resource is a group
+    fail(_("Refusing to install package group %{resource_name}, because allow_virtual is false.") % { resource_name: resource_name }) if self.class.group?(resource_name) && !@resource.allow_virtual?
+
+    cmd = %w{--noconfirm --needed}
+    cmd += install_options if @resource[:install_options]
+
+    if install_with_aura?
+      cmd << "-Aq" << resource_name
+      aura(*cmd)
+    else
+      cmd << "--noprogressbar"
+      cmd << "-Sy" << resource_name
+      pacman(*cmd)
+    end
+  end
+
+end
diff --git a/modules/base_installation/lib/puppet/provider/package/pip2.rb b/modules/base_installation/lib/puppet/provider/package/pip2.rb

new file mode 100644 (file)

index 0000000..27cc0c4
--- /dev/null
+++ b/modules/base_installation/lib/puppet/provider/package/pip2.rb
@@ -0,0 +1,17 @@
+require 'puppet/provider/package/pip'
+
+Puppet::Type.type(:package).provide :pip2,
+  :parent => :pip do
+
+  desc "Python packages via `pip2`.
+
+  This provider supports the `install_options` attribute, which allows command-line flags to be passed to pip2.
+  These options should be specified as a string (e.g. '--flag'), a hash (e.g. {'--flag' => 'value'}),
+  or an array where each element is either a string or a hash."
+
+  has_feature :installable, :uninstallable, :upgradeable, :versionable, :install_options
+
+  def self.cmd
+    ["pip2"]
+  end
+end
diff --git a/modules/base_installation/manifests/package_managers.pp b/modules/base_installation/manifests/package_managers.pp

index c5c848524f5ad63695cbd7334b3bbc528e5b2878..f4df1860b1b9c71cf57e9a66634e3a2fb6ad7aea 100644 (file)
--- a/modules/base_installation/manifests/package_managers.pp
+++ b/modules/base_installation/manifests/package_managers.pp
@@ -15,9 +15,22 @@ class base_installation::package_managers inherits base_installation {
  
    pacman::repo { 'multilib':
      order   => 15,
-    include => '/etc/pacman.d/mirrorlist'
+    include => '/etc/pacman.d/mirrorlist',
    }
  
+  pacman::repo { 'immae':
+    order    => 0,
+    server   => 'https://git.immae.eu/releases/packages/',
+    siglevel => 'Optional',
+  }
+
+  exec { "refresh pacman":
+    command     => "/usr/bin/pacman -Sy",
+    refreshonly => true,
+  }
+
+  Concat["/etc/pacman.conf"] ~> Exec["refresh pacman"] -> Package <| name != "pacman" |>
+
    class { 'aur': }
  
    contain "pacman"
diff --git a/modules/profile/files/monitoring/check_command b/modules/profile/files/monitoring/check_command

new file mode 100644 (file)

index 0000000..2c7eded
--- /dev/null
+++ b/modules/profile/files/monitoring/check_command
@@ -0,0 +1,113 @@
+#!/usr/bin/perl
+
+use strict;
+use Getopt::Std;
+$| = 1;
+
+my %opts;
+getopts('hr:C:c:s:o:', \%opts);
+
+my $STATE_OK = 0;
+my $STATE_WARNING = 1;
+my $STATE_CRITICAL = 2;
+my $STATE_UNKNOWN = 3;
+
+if ($opts{'h'} || scalar(%opts) == 0) {
+  &print_help();
+  exit($STATE_OK);
+}
+
+my $command = $opts{'c'};
+if ($command eq '') {
+  print "You must provide a command to check.\n";
+  exit($STATE_UNKNOWN);
+}
+
+my $expected_output = $opts{'o'};
+my $expected_status = $opts{'s'};
+my $other_command   = $opts{'C'};
+
+if ($other_command eq '' and $expected_status eq '' and $expected_output eq '') {
+  $expected_status = 0;
+}
+
+my $cmd = $command . ' 2>&1';
+my $other_cmd;
+if ($other_command ne '') {
+  $other_cmd = $other_command . ' 2>&1';
+}
+
+my $run_as;
+if ($opts{'r'}) {
+  $run_as = $opts{'r'};
+  $cmd = "sudo -u $run_as -n $cmd";
+
+  if ($other_command ne '') {
+    $other_cmd = "sudo -u $run_as -n $other_cmd";
+  }
+
+}
+
+my $cmd_result = `$cmd`;
+my $other_cmd_result;
+if ($other_command ne '') {
+  $other_cmd_result = `$other_cmd`;
+  chomp($other_cmd_result);
+}
+
+chomp($cmd_result);
+if ($cmd_result =~ /sudo/i) {
+  print "$command CRITICAL - No sudo right to run the command\n";
+  exit($STATE_UNKNOWN);
+} elsif ($expected_status ne '') {
+    if ($? != $expected_status) {
+      print "$command CRITICAL - Response status $?\n";
+      exit($STATE_CRITICAL);
+    } else {
+      print "$command OK - Response status $?\n";
+      exit($STATE_OK);
+    }
+} elsif ($other_command ne '') {
+  if ($cmd_result ne $other_cmd_result) {
+    print "$command CRITICAL - Expected output not matching other command output\n";
+    exit($STATE_CRITICAL);
+  } else {
+    print "$command OK - Expected output matching other command output\n";
+    exit($STATE_OK);
+  }
+} else {
+  if ($cmd_result !~ /$expected_output/) {
+    print "$command CRITICAL - Expected output not matching\n";
+    exit($STATE_CRITICAL);
+  } else {
+    print "$command OK - Expected output matching\n";
+    exit($STATE_OK);
+  }
+}
+
+sub print_help() {
+  print << "EOF";
+Check whether the given command responds as expected. One of -o -C or -s must be selected.
+
+Options:
+-h
+    Print detailed help screen
+
+-c
+    command to run (required)
+
+-C
+    other command to compare output
+
+-r user
+    Run as user via sudo.
+
+-s
+    status code to check
+
+-o
+    output to check
+
+EOF
+}
+
diff --git a/modules/profile/files/monitoring/check_last_file_date b/modules/profile/files/monitoring/check_last_file_date

new file mode 100644 (file)

index 0000000..8eabb57
--- /dev/null
+++ b/modules/profile/files/monitoring/check_last_file_date
@@ -0,0 +1,31 @@
+#!/bin/bash
+ 
+STATE_OK=0
+STATE_WARNING=1
+STATE_CRITICAL=2
+STATE_UNKNOWN=3
+  
+base_path=$1
+hours=$2
+as_user=$3
+
+if [ -z "$as_user" ]; then
+  last_date=$(find $base_path -mindepth 1 -maxdepth 1 -printf "%T@\n" 2>/dev/null | sort | tail -n 1)
+else
+  last_date=$(sudo -u "$as_user" find $base_path -mindepth 1 -maxdepth 1 -printf "%T@\n" 2>/dev/null | sort | tail -n 1)
+fi
+
+if [ -z "$last_date" ]; then
+  echo "UNKNOWN: Could not read folder"
+  exit $STATE_UNKNOWN
+else
+  LC_ALL=C last_date=$(printf "%.*f" 0 $last_date)
+  min_date=$(date -d "$hours hours ago" "+%s")
+  if [ "$min_date" -lt "$last_date" ]; then
+    echo "OK: Last backup $(date -d @$last_date)"
+    exit $STATE_OK
+  else
+    echo "CRITICAL: Last backup $(date -d @$last_date)"
+    exit $STATE_CRITICAL
+  fi
+fi
diff --git a/modules/profile/files/monitoring/check_md_raid b/modules/profile/files/monitoring/check_md_raid

new file mode 100644 (file)

index 0000000..9c79a7a
--- /dev/null
+++ b/modules/profile/files/monitoring/check_md_raid
@@ -0,0 +1,32 @@
+#!/bin/bash
+#
+# Created by Sebastian Grewe, Jammicron Technology
+#
+
+# Get count of raid arrays
+RAID_DEVICES=`grep ^md -c /proc/mdstat`
+
+# Get count of degraded arrays
+RAID_STATUS=`grep "\[.*_.*\]" /proc/mdstat -c`
+
+# Is an array currently recovering, get percentage of recovery
+RAID_RECOVER=`grep recovery /proc/mdstat | awk '{print $4}'`
+
+# Check raid status
+# RAID recovers --> Warning
+if [[ $RAID_RECOVER ]]; then
+       STATUS="WARNING - Checked $RAID_DEVICES arrays, recovering : $RAID_RECOVER"
+       EXIT=1
+# RAID ok
+elif [[ $RAID_STATUS  == "0" ]]; then
+       STATUS="OK - Checked $RAID_DEVICES arrays."
+       EXIT=0
+# All else critical, better save than sorry
+else
+       STATUS="CRITICAL - Checked $RAID_DEVICES arrays, $RAID_STATUS have FAILED"
+       EXIT=2
+fi
+
+# Status and quit
+echo $STATUS
+exit $EXIT
diff --git a/modules/profile/files/monitoring/check_postgres_replication b/modules/profile/files/monitoring/check_postgres_replication

new file mode 100644 (file)

index 0000000..a550077
--- /dev/null
+++ b/modules/profile/files/monitoring/check_postgres_replication
@@ -0,0 +1,35 @@
+#!/bin/bash
+ 
+STATE_OK=0
+STATE_WARNING=1
+STATE_CRITICAL=2
+STATE_UNKNOWN=3
+  
+user=$1
+host=$2
+port=$3
+
+lag=$(psql -h $host -p $port -A -t -c "SELECT COALESCE(EXTRACT(EPOCH FROM replay_lag),0) FROM pg_stat_replication WHERE usename='$user'" 2>/dev/null)
+exit_code=$?
+
+if [[ $exit_code -ne 0 ]]; then
+  echo "UNKNOWN - Impossible to run psql command"
+  exit $STATE_UNKNOWN
+elif [[ -z "$lag" ]]; then
+  echo "UNKNOWN - No replication found for $user"
+  exit $STATE_UNKNOWN
+else
+  output="Replication lag for $user is ${lag}s"
+  LC_ALL=C lag=$(printf "%.*f" 0 $lag)
+
+  if [[ $lag -lt 5 ]]; then
+    echo "OK - $output"
+    exit $STATE_OK
+  elif [[ $lag -lt 10 ]]; then
+    echo "WARNING - $output"
+    exit $STATE_WARNING
+  else
+    echo "CRITICAL - $output"
+    exit $STATE_CRITICAL
+  fi
+fi
diff --git a/modules/profile/manifests/fstab.pp b/modules/profile/manifests/fstab.pp

index 8ecfe723dc016685464d3db6a4d321bb7f26016a..3af316e2808bf23afeac853ae2f69d44d1089120 100644 (file)
--- a/modules/profile/manifests/fstab.pp
+++ b/modules/profile/manifests/fstab.pp
@@ -15,6 +15,12 @@ class profile::fstab (
          device => "UUID=${infos[1]}",
          fstype => $infos[2]
        }
+
+      @profile::monitoring::local_service { "Size on ${infos[0]} partition":
+        local => {
+          check_command => "check_local_disk!10%!5%!${infos[0]}",
+        };
+      }
      }
    }
  }
diff --git a/modules/profile/manifests/monitoring.pp b/modules/profile/manifests/monitoring.pp

new file mode 100644 (file)

index 0000000..8633626
--- /dev/null
+++ b/modules/profile/manifests/monitoring.pp
@@ -0,0 +1,58 @@
+class profile::monitoring (
+  Optional[String] $naemon_url   = undef,
+  Optional[String] $naemon_token = undef,
+) inherits profile::monitoring::params {
+  ensure_packages(["naemon", "cnagios"])
+
+  file { "/etc/naemon":
+    ensure  => "directory",
+    recurse => true,
+    purge   => true,
+    force   => true,
+    require => Package["naemon"],
+  }
+  ->
+  file { "/etc/naemon/resource.cfg":
+    ensure  => "file",
+    owner   => "naemon",
+    group   => "naemon",
+    mode    => "0600",
+    content => template("profile/monitoring/resource.cfg.erb"),
+  }
+  ->
+  file { "/etc/naemon/naemon.cfg":
+    ensure  => "file",
+    owner   => "naemon",
+    group   => "naemon",
+    mode    => "0644",
+    content => template("profile/monitoring/naemon.cfg.erb"),
+  }
+  ->
+  file { $objects:
+    ensure => "file",
+    owner  => "naemon",
+    group  => "naemon",
+    mode   => "0600"
+  }
+  ->
+  service { "naemon":
+    ensure => "running",
+    enable => true,
+  }
+
+  unless ($naemon_url == undef or empty($naemon_url)) {
+    file { "/etc/naemon/send_nrdp.sh":
+      ensure  => "file",
+      owner   => "naemon",
+      group   => "naemon",
+      mode    => "0700",
+      content => template("profile/monitoring/send_nrdp.sh.erb"),
+    }
+  }
+
+  include "profile::monitoring::hosts"
+  include "profile::monitoring::services"
+  include "profile::monitoring::commands"
+  include "profile::monitoring::times"
+  include "profile::monitoring::contacts"
+}
diff --git a/modules/profile/manifests/monitoring/commands.pp b/modules/profile/manifests/monitoring/commands.pp

new file mode 100644 (file)

index 0000000..1c8d0b4
--- /dev/null
+++ b/modules/profile/manifests/monitoring/commands.pp
@@ -0,0 +1,70 @@
+class profile::monitoring::commands inherits profile::monitoring {
+  ensure_packages(["monitoring-plugins"])
+
+  file { $plugins:
+    ensure => "directory",
+    owner  => "root",
+    group  => "naemon",
+    mode   => "0755",
+  }
+
+  [
+    "check_command",
+    "check_md_raid",
+    "check_postgres_replication",
+    "check_last_file_date",
+  ].each |$file| {
+    file { "$plugins/$file":
+      ensure => "present",
+      owner  => "root",
+      group  => "naemon",
+      mode   => "0755",
+      source => "puppet:///modules/profile/monitoring/$file",
+    }
+  }
+
+  Nagios_command {
+    ensure => "present",
+    owner  => "naemon",
+    group  => "naemon",
+    target => $objects,
+    notify => Service["naemon"],
+    before => Service["naemon"],
+    require => File["/etc/naemon"],
+  }
+
+  nagios_command {
+    "check-host-alive":
+      command_line => '$USER1$/check_ping -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 5';
+    "check_local_disk":
+      command_line => '$USER1$/check_disk -w $ARG1$ -c $ARG2$ -p $ARG3$';
+    "check_local_procs":
+      command_line => '$USER1$/check_procs -w $ARG1$ -c $ARG2$ -s $ARG3$';
+    "check_local_load":
+      command_line => '$USER1$/check_load -w $ARG1$ -c $ARG2$';
+    "check_local_swap":
+      command_line => '$USER1$/check_swap -n ok -w $ARG1$ -c $ARG2$';
+    "check_ntp":
+      command_line => '$USER1$/check_ntp_time -H 0.arch.pool.ntp.org';
+    "check_md_raid":
+      command_line => '$USER2$/check_md_raid',
+      require      => File["$plugins/check_md_raid"];
+    "check_command_output":
+      command_line => '$USER2$/check_command -c "$ARG1$" -o "$ARG2$" $ARG3$',
+      require      => File["$plugins/check_command"];
+    "check_postgresql_replication":
+      command_line => '/usr/bin/sudo -u postgres $USER2$/check_postgres_replication "$ARG1$" "$ARG2$" "$ARG3$"',
+      require      => File["$plugins/check_postgres_replication"];
+    "check_last_file_date":
+      command_line => '$USER2$/check_last_file_date "$ARG1$" "$ARG2$" "$ARG3$"',
+      require      => File["$plugins/check_last_file_date"],
+  }
+
+  unless empty($naemon_url) {
+    nagios_command {
+      "notify-master":
+        command_line => '/etc/naemon/send_nrdp.sh -H "$HOSTADDRESS$" -s "$SERVICEDESC$" -S "$SERVICESTATEID$" -o "$SERVICEOUTPUT$"',
+        require      => File["/etc/naemon/send_nrdp.sh"];
+    }
+  }
+}
diff --git a/modules/profile/manifests/monitoring/contacts.pp b/modules/profile/manifests/monitoring/contacts.pp

new file mode 100644 (file)

index 0000000..a751153
--- /dev/null
+++ b/modules/profile/manifests/monitoring/contacts.pp
@@ -0,0 +1,16 @@
+class profile::monitoring::contacts inherits profile::monitoring::params {
+  Nagios_contactgroup {
+    ensure  => "present",
+    owner   => "naemon",
+    group   => "naemon",
+    target  => $objects,
+    notify  => Service["naemon"],
+    before  => Service["naemon"],
+    require => File["/etc/naemon"],
+  }
+
+  nagios_contactgroup { "admins":
+    alias => "System administrators",    
+  }
+
+}
diff --git a/modules/profile/manifests/monitoring/external_service.pp b/modules/profile/manifests/monitoring/external_service.pp

new file mode 100644 (file)

index 0000000..027dad8
--- /dev/null
+++ b/modules/profile/manifests/monitoring/external_service.pp
@@ -0,0 +1,16 @@
+define profile::monitoring::external_service (
+  Optional[String] $type   = undef,
+  Optional[Hash]   $master = {},
+) {
+  include profile::monitoring::params
+  $service_description = $title
+
+  nagios_service { $service_description:
+    service_description => $service_description,
+    host_name           => $::profile::monitoring::params::service_local["host_name"],
+    use                 => $::profile::monitoring::params::service_types[$type],
+    target              => $::profile::monitoring::params::services_for_master,
+    *                   => $master,
+  }
+
+}
diff --git a/modules/profile/manifests/monitoring/hosts.pp b/modules/profile/manifests/monitoring/hosts.pp

new file mode 100644 (file)

index 0000000..f7802be
--- /dev/null
+++ b/modules/profile/manifests/monitoring/hosts.pp
@@ -0,0 +1,45 @@
+class profile::monitoring::hosts inherits profile::monitoring::params {
+  $real_hostname = lookup("base_installation::real_hostname")
+
+  Nagios_hostgroup {
+    ensure  => "present",
+    owner   => "naemon",
+    group   => "naemon",
+    target  => $objects,
+    notify  => Service["naemon"],
+    before  => Service["naemon"],
+    require => File["/etc/naemon"],
+  }
+
+  Nagios_host {
+    ensure  => "present",
+    owner   => "naemon",
+    group   => "naemon",
+    target  => $objects,
+    notify  => Service["naemon"],
+    before  => Service["naemon"],
+    require => File["/etc/naemon"],
+  }
+
+  nagios_hostgroup { "linux-servers":
+    alias   => "Linux Servers",
+    members => [$real_hostname],
+  }
+
+  $host_linux_server = {
+    check_command         => "check-host-alive",
+    check_interval        => 5,
+    check_period          => "24x7",
+    contact_groups        => "admins",
+    max_check_attempts    => "10",
+    notification_interval => "120",
+    notification_options  => "d,u,r",
+    retry_interval        => "1",
+  }
+
+  nagios_host { $real_hostname:
+    address => $real_hostname;
+  default: * => $host_linux_server,
+  }
+
+}
diff --git a/modules/profile/manifests/monitoring/local_service.pp b/modules/profile/manifests/monitoring/local_service.pp

new file mode 100644 (file)

index 0000000..1f975f0
--- /dev/null
+++ b/modules/profile/manifests/monitoring/local_service.pp
@@ -0,0 +1,56 @@
+define profile::monitoring::local_service (
+  Optional[Hash] $sudos  = {},
+  Optional[Hash] $common = {},
+  Optional[Hash] $master = {},
+  Optional[Hash] $local  = {},
+) {
+  include profile::monitoring::params
+
+  $service_description = $title
+
+  Nagios_service {
+    ensure  => "present",
+    owner   => "naemon",
+    group   => "naemon",
+    notify  => Service["naemon"],
+    before  => Service["naemon"],
+    require => File["/etc/naemon"],
+  }
+
+  $sudos.each |$sudo_name, $content| {
+    ensure_resource("sudo::conf", $sudo_name, {
+      content => $content,
+      before  => Nagios_service[$service_description],
+    })
+  }
+
+  [true, false].each |$services_for_master| {
+    if $services_for_master {
+      $default_local = {
+        mode           => "0644",
+        target         => $::profile::monitoring::params::services_for_master,
+        check_interval => $::profile::monitoring::params::service_local["check_interval"],
+        retry_interval => $::profile::monitoring::params::service_local["retry_interval"],
+        host_name      => $::profile::monitoring::params::service_local["host_name"],
+        use            => $::profile::monitoring::params::service_types["passive"],
+        notify         => [],
+      }
+      $t = "master - "
+      $services_key = $master
+    } else {
+      $default_local = merge(
+        $::profile::monitoring::params::service_local,
+        { target => $::profile::monitoring::params::objects }
+      )
+      $t = ""
+      $services_key = $local
+    }
+
+    $hash = merge($default_local, $common, $services_key)
+
+    nagios_service { "$t$service_description":
+      service_description => $service_description,
+      *                   => $hash
+    }
+  }
+}
diff --git a/modules/profile/manifests/monitoring/params.pp b/modules/profile/manifests/monitoring/params.pp

new file mode 100644 (file)

index 0000000..27f895e
--- /dev/null
+++ b/modules/profile/manifests/monitoring/params.pp
@@ -0,0 +1,42 @@
+class profile::monitoring::params {
+  $real_hostname = lookup("base_installation::real_hostname")
+
+  $services_for_master = "/etc/naemon/services_for_master.cfg"
+  $objects             = "/etc/naemon/objects.cfg"
+  $plugins             = "/etc/naemon/monitoring-plugins"
+
+  $service_types = {
+    "passive" => "external-passive-service",
+    "web"     => "external-web-service",
+  }
+
+  $service_generic = {
+    active_checks_enabled        => "1",
+    check_freshness              => "0",
+    check_interval               => "10",
+    check_period                 => "24x7",
+    contact_groups               => "admins",
+    event_handler_enabled        => "1",
+    flap_detection_enabled       => "1",
+    is_volatile                  => "0",
+    max_check_attempts           => "3",
+    notification_interval        => "60",
+    notification_options         => "w,u,c,r",
+    notification_period          => "24x7",
+    notifications_enabled        => "0",
+    obsess_over_service          => "1",
+    passive_checks_enabled       => "1",
+    process_perf_data            => "1",
+    retain_nonstatus_information => "1",
+    retain_status_information    => "1",
+    retry_interval               => "2",
+  }
+
+  $service_local = merge($service_generic, {
+    host_name          => $real_hostname,
+    check_interval     => "5",
+    max_check_attempts => "4",
+    retry_interval     => "1",
+    })
+
+}
diff --git a/modules/profile/manifests/monitoring/services.pp b/modules/profile/manifests/monitoring/services.pp

new file mode 100644 (file)

index 0000000..95c6efb
--- /dev/null
+++ b/modules/profile/manifests/monitoring/services.pp
@@ -0,0 +1,42 @@
+class profile::monitoring::services {
+
+  profile::monitoring::local_service {
+    "Size on root partition":
+      local => {
+        check_command => "check_local_disk!20%!10%!/",
+      };
+    "Total number of process":
+      local => {
+        check_command => "check_local_procs!50!100!RSZDT",
+      };
+    "Average load":
+      local => {
+        check_command => "check_local_load!8.0,8.0,8.0!10.0,10.0,10.0",
+      };
+    "Swap usage":
+      local => {
+        check_command => "check_local_swap!20!10",
+      };
+    "fail2ban is active":
+      sudos   => {
+        "naemon-fail2ban" => "naemon  ALL=(root) NOPASSWD: /usr/bin/fail2ban-client ping",
+      },
+      local   => {
+        check_command => "check_command_output!fail2ban-client ping!pong!-r root",
+      };
+    "NTP is activated and working":
+      local => {
+        check_command => "check_ntp",
+      };
+    "No mdadm array is degraded":
+      common => {
+        ensure => (find_file("/proc/mdstat") == undef) ? { true => "absent", default =>"present" },
+      },
+      local => {
+        check_command => "check_md_raid",
+      };
+  }
+
+  Profile::Monitoring::Local_service <| |>
+  Profile::Monitoring::External_service <| |>
+}
diff --git a/modules/profile/manifests/monitoring/times.pp b/modules/profile/manifests/monitoring/times.pp

new file mode 100644 (file)

index 0000000..42f5d9c
--- /dev/null
+++ b/modules/profile/manifests/monitoring/times.pp
@@ -0,0 +1,23 @@
+class profile::monitoring::times inherits profile::monitoring::params {
+  Nagios_timeperiod {
+    ensure  => "present",
+    owner   => "naemon",
+    group   => "naemon",
+    target  => $objects,
+    notify  => Service["naemon"],
+    before  => Service["naemon"],
+    require => File["/etc/naemon"],
+  }
+
+  nagios_timeperiod { "24x7":
+    alias     => "24 Hours A Day, 7 Days A Week",
+    monday    => "00:00-24:00",
+    tuesday   => "00:00-24:00",
+    wednesday => "00:00-24:00",
+    thursday  => "00:00-24:00",
+    friday    => "00:00-24:00",
+    saturday  => "00:00-24:00",
+    sunday    => "00:00-24:00",
+  }
+
+}
diff --git a/modules/profile/manifests/postgresql.pp b/modules/profile/manifests/postgresql.pp

index 97ce57291b6bbf4abf9de7c2e04932b5dbe04435..fedbcb1ef7574f5675690a3b83bf25d01177873d 100644 (file)
--- a/modules/profile/manifests/postgresql.pp
+++ b/modules/profile/manifests/postgresql.pp
@@ -28,5 +28,13 @@ class profile::postgresql (
  
    profile::postgresql::base_pg_hba_rules { "default": }
  
+  @profile::monitoring::local_service { "Databases are present in postgresql":
+    sudos => {
+      "naemon-postgresql-database-public" => "naemon ALL=(postgres) NOPASSWD: /usr/bin/psql -c select\\ nspname\\ from\\ pg_catalog.pg_namespace"
+    },
+    local => {
+      check_command => "check_command_output!psql -c 'select nspname from pg_catalog.pg_namespace'!public!-r postgres",
+    }
+  }
  }
  
diff --git a/modules/profile/manifests/postgresql/backup_dump.pp b/modules/profile/manifests/postgresql/backup_dump.pp

index 53fb20ee0b3b2336ac7e3fd241252ceb09f44833..e247cf027e64535e804060bcbcb5c9668179cb80 100644 (file)
--- a/modules/profile/manifests/postgresql/backup_dump.pp
+++ b/modules/profile/manifests/postgresql/backup_dump.pp
@@ -57,4 +57,13 @@ define profile::postgresql::backup_dump (
        },
      ]
    }
+
+  @profile::monitoring::local_service { "Last postgresql dump in $pg_backup_path is not too old":
+    sudos => {
+      "naemon-postgresql-dumps-$pg_host" => "naemon  ALL=($pg_user) NOPASSWD: /usr/bin/find $pg_backup_path -mindepth 1 -maxdepth 1 -printf %T@?n",
+    },
+    local => {
+      check_command => "check_last_file_date!$pg_backup_path!7!$pg_user",
+    }
+  }
  }
diff --git a/modules/profile/manifests/postgresql/backup_pgbouncer.pp b/modules/profile/manifests/postgresql/backup_pgbouncer.pp

index 45b8ed5a5528a89cfde528dda73bc421441e4b02..5fd7861e52c304ad6988df80f35d816d5d25fde3 100644 (file)
--- a/modules/profile/manifests/postgresql/backup_pgbouncer.pp
+++ b/modules/profile/manifests/postgresql/backup_pgbouncer.pp
@@ -48,6 +48,16 @@ define profile::postgresql::backup_pgbouncer (
      content => "${pg_infos[pgbouncer_dbname]} = host=$pg_path$pg_port user=${pg_infos[dbuser]} dbname=${pg_infos[dbname]}",
    }
  
+  # Current pam configuration requires password for postgres
+  # @profile::monitoring::local_service { "Database ${pg_infos[pgbouncer_dbname]} is available in pgbouncer":
+  #   sudos => {
+  #     "naemon-postgresql-database-public" => "naemon ALL=(postgres) NOPASSWD: /usr/bin/psql -c select\ nspname\ from\ pg_catalog.pg_namespace ${pg_infos[pgbouncer_dbname]}"
+  #   },
+  #   local => {
+  #     check_command => "check_command_output!psql -c 'select nspname from pg_catalog.pg_namespace' ${pg_infos[pgbouncer_dbname]}!public!-r postgres",
+  #   }
+  # }
+
    # pg_hba for accessed cluster
    postgresql::server::pg_hba_rule { "$pg_backup_host - local access as ${pg_infos[dbuser]} user":
      description => "Allow local access to ${pg_infos[dbuser]} user",
diff --git a/modules/profile/manifests/postgresql/master.pp b/modules/profile/manifests/postgresql/master.pp

index 02315a615d7e771edaba1e6828582099bd387142..99ac4c45fe3fc7cefa1e8ad8bbf31cffed0b482c 100644 (file)
--- a/modules/profile/manifests/postgresql/master.pp
+++ b/modules/profile/manifests/postgresql/master.pp
@@ -59,5 +59,15 @@ define profile::postgresql::master (
        handle_slot   => true,
        add_self_role => true,
      }
+
+    @profile::monitoring::local_service { "Postgresql replication for $backup_host is up to date":
+      sudos => {
+        "naemon-postgresql-replication-$backup_host" => "naemon ALL=(postgres) NOPASSWD: /etc/naemon/monitoring-plugins/check_postgres_replication $backup_host /run/postgresql 5432"
+
+      },
+      local => {
+        check_command => "check_postgresql_replication!$backup_host!/run/postgresql!5432",
+      }
+    }
    }
  }
diff --git a/modules/profile/manifests/postgresql/ssl.pp b/modules/profile/manifests/postgresql/ssl.pp

index dc56c0bd61e77cfba9f4c7d294988167d9472bce..b809a9d8ee0ff2b30991eceb51497c96163c38ad 100644 (file)
--- a/modules/profile/manifests/postgresql/ssl.pp
+++ b/modules/profile/manifests/postgresql/ssl.pp
@@ -78,5 +78,4 @@ define profile::postgresql::ssl (
        content => "ssl = on\nssl_key_file = '$ssl_key'\nssl_cert_file = '$ssl_cert'\n"
      }
    }
-
  }
diff --git a/modules/profile/templates/monitoring/naemon.cfg.erb b/modules/profile/templates/monitoring/naemon.cfg.erb

new file mode 100644 (file)

index 0000000..bacbe04
--- /dev/null
+++ b/modules/profile/templates/monitoring/naemon.cfg.erb
@@ -0,0 +1,1038 @@
+##############################################################################
+#
+# naemon.cfg - Sample Main Config File for Naemon 1.0.7
+#
+# Read the documentation for more information on this configuration
+# file.  I've provided some comments here, but things may not be so
+# clear without further explanation.
+#
+#
+##############################################################################
+
+
+# LOG FILE
+# This is the main log file where service and host events are logged
+# for historical purposes.  This should be the first option specified
+# in the config file!!!
+
+log_file=/var/log/naemon/naemon.log
+
+
+
+# OBJECT CONFIGURATION FILE(S)
+# These are the object configuration files in which you define hosts,
+# host groups, contacts, contact groups, services, etc.
+# You can split your object definitions across several config files
+# if you wish (as shown below), or keep them all in a single config file.
+
+# You can specify individual object config files as shown below:
+cfg_file=<%= @objects %>
+#cfg_file=/etc/naemon/objects/commands.cfg
+#cfg_file=/etc/naemon/objects/contacts.cfg
+#cfg_file=/etc/naemon/objects/timeperiods.cfg
+#cfg_file=/etc/naemon/objects/templates.cfg
+
+
+# You can also tell naemon to process all config files (with a .cfg
+# extension) in a particular directory by using the cfg_dir
+# directive as shown below:
+#cfg_dir=/etc/naemon/conf.d
+
+
+
+
+# OBJECT CACHE FILE
+# This option determines where object definitions are cached when
+# naemon starts/restarts.  The CGIs read object definitions from
+# this cache file (rather than looking at the object config files
+# directly) in order to prevent inconsistencies that can occur
+# when the config files are modified after naemon starts.
+
+object_cache_file=/var/lib/naemon/objects.cache
+
+
+
+# PRE-CACHED OBJECT FILE
+# This options determines the location of the precached object file.
+# If you run naemon with the -p command line option, it will preprocess
+# your object configuration file(s) and write the cached config to this
+# file.  You can then start naemon with the -u option to have it read
+# object definitions from this precached file, rather than the standard
+# object configuration files (see the cfg_file and cfg_dir options above).
+# Using a precached object file can speed up the time needed to (re)start
+# the naemon process if you've got a large and/or complex configuration.
+# Read the documentation section on optimizing naemon to find our more
+# about how this feature works.
+
+precached_object_file=/var/lib/naemon/objects.precache
+
+
+
+# RESOURCE FILE
+# This is an optional resource file that contains $USERx$ macro
+# definitions. Multiple resource files can be specified by using
+# multiple resource_file definitions.  The CGIs will not attempt to
+# read the contents of resource files, so information that is
+# considered to be sensitive (usernames, passwords, etc) can be
+# defined as macros in this file and restrictive permissions (600)
+# can be placed on this file.
+
+resource_file=/etc/naemon/resource.cfg
+
+
+
+# STATUS FILE
+# This is where the current status of all monitored services and
+# hosts is stored.  Its contents are read and processed by the CGIs.
+# The contents of the status file are deleted every time naemon
+#  restarts.
+
+status_file=/var/lib/naemon/status.dat
+
+
+
+# STATUS FILE UPDATE INTERVAL
+# This option determines the frequency (in seconds) that
+# naemon will periodically dump program, host, and
+# service status data. Set it to 0 to disable updates.
+
+status_update_interval=10
+
+
+
+# EXTERNAL COMMAND OPTION
+# This option allows you to specify whether or not Naemon should check
+# for external commands (in the command file defined below).  By default
+# Naemon will *not* check for external commands, just to be on the
+# cautious side.  If you want to be able to use the CGI command interface
+# you will have to enable this.
+# Values: 0 = disable commands, 1 = enable commands
+
+check_external_commands=1
+
+
+
+# EXTERNAL COMMAND FILE
+# This is the file that Naemon checks for external command requests.
+# It is also where the command CGI will write commands that are submitted
+# by users, so it must be writeable by the user that the web server
+# is running as (usually 'nobody').  Permissions should be set at the
+# directory level instead of on the file, as the file is deleted every
+# time its contents are processed.
+
+command_file=/var/lib/naemon/naemon.cmd
+
+
+
+# QUERY HANDLER INTERFACE
+# This is the socket that is created for the Query Handler interface
+
+#query_socket=/var/lib/naemon/naemon.qh
+
+
+
+# LOCK FILE
+# This is the lockfile that Naemon will use to store its PID number
+# in when it is running in daemon mode.
+
+lock_file=/run/naemon/naemon.pid
+
+
+
+# TEMP FILE
+# This is a temporary file that is used as scratch space when Naemon
+# updates the status log, cleans the comment file, etc.  This file
+# is created, used, and deleted throughout the time that Naemon is
+# running.
+
+temp_file=/var/lib/naemon/naemon.tmp
+
+
+
+# TEMP PATH
+# This is path where Naemon can create temp files for service and
+# host check results, etc.
+
+temp_path=/var/cache/naemon
+
+
+
+# EVENT BROKER OPTIONS
+# Controls what (if any) data gets sent to the event broker.
+# Values:  0      = Broker nothing
+#         -1      = Broker everything
+#         <other> = See documentation
+
+event_broker_options=-1
+
+
+
+# EVENT BROKER MODULE(S)
+# This directive is used to specify an event broker module that should
+# be loaded by Naemon at startup.  Use multiple directives if you want
+# to load more than one module.  Arguments that should be passed to
+# the module at startup are seperated from the module path by a space.
+#
+# Example:
+#
+#   broker_module=<modulepath> [moduleargs]
+
+#broker_module=/usr/lib/naemon/naemon-livestatus/livestatus.so /var/cache/naemon/live
+#broker_module=/somewhere/module1.o
+#broker_module=/somewhere/module2.o arg1 arg2=3 debug=0
+
+# In order to provide drop-in support for new modules, you can also make use of
+# the include_dir directive. The include_dir directive causes Naemon to parse
+# any configuration (not just object configuration, as with cfg_dir) as if the
+# contents of the files in the pointed-to directory was included on this line.
+# The path to the directory is relative to the path of the main naemon.cfg
+# file.
+# include_dir=module-conf.d
+
+# LOG ARCHIVE PATH
+# This is the directory where archived (rotated) log files are placed by the
+# logrotate daemon. It is used by out of core add-ons to discover the logfiles.
+
+log_archive_path=/var/log/naemon/archives
+
+
+
+# LOGGING OPTIONS
+# If you want messages logged to the syslog facility, as well as the
+# Naemon log file set this option to 1.  If not, set it to 0.
+
+use_syslog=1
+
+# NOTIFICATION LOGGING OPTION
+# If you don't want notifications to be logged, set this value to 0.
+# If notifications should be logged, set the value to 1.
+
+log_notifications=1
+
+# Notification suppression reason (NSR) logging causes the reason for a
+# notification suppression to be logged, when it occurs. This can potentially
+# add some noise to your log file, but is highly useful when troubleshooting
+# missing notifications.
+
+enable_notification_suppression_reason_logging=1
+
+
+# SERVICE RETRY LOGGING OPTION
+# If you don't want service check retries to be logged, set this value
+# to 0.  If retries should be logged, set the value to 1.
+
+log_service_retries=1
+
+
+
+# HOST RETRY LOGGING OPTION
+# If you don't want host check retries to be logged, set this value to
+# 0.  If retries should be logged, set the value to 1.
+
+log_host_retries=1
+
+
+
+# EVENT HANDLER LOGGING OPTION
+# If you don't want host and service event handlers to be logged, set
+# this value to 0.  If event handlers should be logged, set the value
+# to 1.
+
+log_event_handlers=1
+
+
+
+# INITIAL STATES LOGGING OPTION
+# If you want Naemon to log all initial host and service states to
+# the main log file (the first time the service or host is checked)
+# you can enable this option by setting this value to 1.  If you
+# are not using an external application that does long term state
+# statistics reporting, you do not need to enable this option.  In
+# this case, set the value to 0.
+
+log_initial_states=1
+
+
+
+# CURRENT STATES LOGGING OPTION
+# If you don't want Naemon to log all current host and service states
+# after log has been rotated to the main log file, you can disable this
+# option by setting this value to 0. Default value is 1.
+
+log_current_states=1
+
+
+
+# EXTERNAL COMMANDS LOGGING OPTION
+# If you don't want Naemon to log external commands, set this value
+# to 0.  If external commands should be logged, set this value to 1.
+# Note: This option does not include logging of passive service
+# checks - see the option below for controlling whether or not
+# passive checks are logged.
+
+log_external_commands=1
+
+
+
+# PASSIVE CHECKS LOGGING OPTION
+# If you don't want Naemon to log passive host and service checks, set
+# this value to 0.  If passive checks should be logged, set
+# this value to 1.
+
+log_passive_checks=1
+
+
+
+# GLOBAL HOST AND SERVICE EVENT HANDLERS
+# These options allow you to specify a host and service event handler
+# command that is to be run for every host or service state change.
+# The global event handler is executed immediately prior to the event
+# handler that you have optionally specified in each host or
+# service definition. The command argument is the short name of a
+# command definition that you define in your host configuration file.
+# Read the HTML docs for more information.
+
+#global_host_event_handler=somecommand
+#global_service_event_handler=somecommand
+
+
+
+# MAXIMUM CONCURRENT SERVICE CHECKS
+# This option allows you to specify the maximum number of
+# service checks that can be run in parallel at any given time.
+# Specifying a value of 1 for this variable essentially prevents
+# any service checks from being parallelized.  A value of 0
+# will not restrict the number of concurrent checks that are
+# being executed.
+
+max_concurrent_checks=0
+
+
+# CHECK RESULT PATH
+# This is directory where Naemon reads check results of host and
+# service checks to further process them.
+#
+# Note: Naemon does not require this folder internally but it still
+# can be used to pass check results to Naemon.
+
+check_result_path=/var/cache/naemon/checkresults
+
+
+# CACHED HOST CHECK HORIZON
+# This option determines the maximum amount of time (in seconds)
+# that the state of a previous host check is considered current.
+# Cached host states (from host checks that were performed more
+# recently that the timeframe specified by this value) can immensely
+# improve performance in regards to the host check logic.
+# Too high of a value for this option may result in inaccurate host
+# states being used by Naemon, while a lower value may result in a
+# performance hit for host checks.  Use a value of 0 to disable host
+# check caching.
+
+cached_host_check_horizon=15
+
+
+
+# CACHED SERVICE CHECK HORIZON
+# This option determines the maximum amount of time (in seconds)
+# that the state of a previous service check is considered current.
+# Cached service states (from service checks that were performed more
+# recently that the timeframe specified by this value) can immensely
+# improve performance in regards to predictive dependency checks.
+# Use a value of 0 to disable service check caching.
+
+cached_service_check_horizon=15
+
+
+
+# ENABLE PREDICTIVE HOST DEPENDENCY CHECKS
+# This option determines whether or not Naemon will attempt to execute
+# checks of hosts when it predicts that future dependency logic test
+# may be needed.  These predictive checks can help ensure that your
+# host dependency logic works well.
+# Values:
+#  0 = Disable predictive checks
+#  1 = Enable predictive checks (default)
+
+enable_predictive_host_dependency_checks=1
+
+
+
+# ENABLE PREDICTIVE SERVICE DEPENDENCY CHECKS
+# This option determines whether or not Naemon will attempt to execute
+# checks of service when it predicts that future dependency logic test
+# may be needed.  These predictive checks can help ensure that your
+# service dependency logic works well.
+# Values:
+#  0 = Disable predictive checks
+#  1 = Enable predictive checks (default)
+
+enable_predictive_service_dependency_checks=1
+
+
+
+# SOFT STATE DEPENDENCIES
+# This option determines whether or not Naemon will use soft state
+# information when checking host and service dependencies. Normally
+# Naemon will only use the latest hard host or service state when
+# checking dependencies. If you want it to use the latest state (regardless
+# of whether its a soft or hard state type), enable this option.
+# Values:
+#  0 = Don't use soft state dependencies (default)
+#  1 = Use soft state dependencies
+
+soft_state_dependencies=0
+
+
+
+# TIME CHANGE ADJUSTMENT THRESHOLDS
+# These options determine when Naemon will react to detected changes
+# in system time (either forward or backwards).
+
+#time_change_threshold=900
+
+
+
+# TIMEOUT VALUES
+# These options control how much time Naemon will allow various
+# types of commands to execute before killing them off.  Options
+# are available for controlling maximum time allotted for
+# service checks, host checks, event handlers, notifications, the
+# ocsp command, and performance data commands.  All values are in
+# seconds.
+
+service_check_timeout=75
+host_check_timeout=30
+event_handler_timeout=30
+notification_timeout=30
+ocsp_timeout=5
+perfdata_timeout=5
+
+
+
+# RETAIN STATE INFORMATION
+# This setting determines whether or not Naemon will save state
+# information for services and hosts before it shuts down.  Upon
+# startup Naemon will reload all saved service and host state
+# information before starting to monitor.  This is useful for
+# maintaining long-term data on state statistics, etc, but will
+# slow Naemon down a bit when it (re)starts.  Since its only
+# a one-time penalty, I think its well worth the additional
+# startup delay.
+
+retain_state_information=1
+
+
+
+# STATE RETENTION FILE
+# This is the file that Naemon should use to store host and
+# service state information before it shuts down.  The state
+# information in this file is also read immediately prior to
+# starting to monitor the network when Naemon is restarted.
+# This file is used only if the retain_state_information
+# variable is set to 1.
+
+state_retention_file=/var/lib/naemon/retention.dat
+
+
+
+# RETENTION DATA UPDATE INTERVAL
+# This setting determines how often (in minutes) that Naemon
+# will automatically save retention data during normal operation.
+# If you set this value to 0, Naemon will not save retention
+# data at regular interval, but it will still save retention
+# data before shutting down or restarting.  If you have disabled
+# state retention, this option has no effect.
+
+retention_update_interval=60
+
+
+
+# USE RETAINED PROGRAM STATE
+# This setting determines whether or not Naemon will set
+# program status variables based on the values saved in the
+# retention file.  If you want to use retained program status
+# information, set this value to 1.  If not, set this value
+# to 0.
+
+use_retained_program_state=1
+
+
+
+# USE RETAINED SCHEDULING INFO
+# This setting determines whether or not Naemon will retain
+# the scheduling info (next check time) for hosts and services
+# based on the values saved in the retention file.  If you
+# If you want to use retained scheduling info, set this
+# value to 1.  If not, set this value to 0.
+
+use_retained_scheduling_info=1
+
+
+
+# RETAINED ATTRIBUTE MASKS (ADVANCED FEATURE)
+# The following variables are used to specify specific host and
+# service attributes that should *not* be retained by Naemon during
+# program restarts.
+#
+# The values of the masks are bitwise ANDs of values specified
+# by the "MODATTR_" definitions found in include/common.h.
+# For example, if you do not want the current enabled/disabled state
+# of flap detection and event handlers for hosts to be retained, you
+# would use a value of 24 for the host attribute mask...
+# MODATTR_EVENT_HANDLER_ENABLED (8) + MODATTR_FLAP_DETECTION_ENABLED (16) = 24
+
+# This mask determines what host attributes are not retained
+retained_host_attribute_mask=0
+
+# This mask determines what service attributes are not retained
+retained_service_attribute_mask=0
+
+# These two masks determine what process attributes are not retained.
+# There are two masks, because some process attributes have host and service
+# options.  For example, you can disable active host checks, but leave active
+# service checks enabled.
+retained_process_host_attribute_mask=0
+retained_process_service_attribute_mask=0
+
+# These two masks determine what contact attributes are not retained.
+# There are two masks, because some contact attributes have host and
+# service options.  For example, you can disable host notifications for
+# a contact, but leave service notifications enabled for them.
+retained_contact_host_attribute_mask=0
+retained_contact_service_attribute_mask=0
+
+
+
+# INTERVAL LENGTH
+# This is the seconds per unit interval as used in the
+# host/contact/service configuration files.  Setting this to 60 means
+# that each interval is one minute long (60 seconds).  Other settings
+# have not been tested much, so your mileage is likely to vary...
+
+interval_length=60
+
+
+
+# AGGRESSIVE HOST CHECKING OPTION
+# If you don't want to turn on aggressive host checking features, set
+# this value to 0 (the default).  Otherwise set this value to 1 to
+# enable the aggressive check option.  Read the docs for more info
+# on what aggressive host check is or check out the source code in
+# base/checks.c
+
+use_aggressive_host_checking=0
+
+
+
+# SERVICE CHECK EXECUTION OPTION
+# This determines whether or not Naemon will actively execute
+# service checks when it initially starts.  If this option is
+# disabled, checks are not actively made, but Naemon can still
+# receive and process passive check results that come in.  Unless
+# you're implementing redundant hosts or have a special need for
+# disabling the execution of service checks, leave this enabled!
+# Values: 1 = enable checks, 0 = disable checks
+
+execute_service_checks=1
+
+
+
+# PASSIVE SERVICE CHECK ACCEPTANCE OPTION
+# This determines whether or not Naemon will accept passive
+# service checks results when it initially (re)starts.
+# Values: 1 = accept passive checks, 0 = reject passive checks
+
+accept_passive_service_checks=1
+
+
+
+# HOST CHECK EXECUTION OPTION
+# This determines whether or not Naemon will actively execute
+# host checks when it initially starts.  If this option is
+# disabled, checks are not actively made, but Naemon can still
+# receive and process passive check results that come in.  Unless
+# you're implementing redundant hosts or have a special need for
+# disabling the execution of host checks, leave this enabled!
+# Values: 1 = enable checks, 0 = disable checks
+
+execute_host_checks=1
+
+
+
+# PASSIVE HOST CHECK ACCEPTANCE OPTION
+# This determines whether or not Naemon will accept passive
+# host checks results when it initially (re)starts.
+# Values: 1 = accept passive checks, 0 = reject passive checks
+
+accept_passive_host_checks=1
+
+
+
+# NOTIFICATIONS OPTION
+# This determines whether or not Naemon will sent out any host or
+# service notifications when it is initially (re)started.
+# Values: 1 = enable notifications, 0 = disable notifications
+
+enable_notifications=1
+
+
+
+# EVENT HANDLER USE OPTION
+# This determines whether or not Naemon will run any host or
+# service event handlers when it is initially (re)started.  Unless
+# you're implementing redundant hosts, leave this option enabled.
+# Values: 1 = enable event handlers, 0 = disable event handlers
+
+enable_event_handlers=1
+
+
+
+# PROCESS PERFORMANCE DATA OPTION
+# This determines whether or not Naemon will process performance
+# data returned from service and host checks.  If this option is
+# enabled, host performance data will be processed using the
+# host_perfdata_command (defined below) and service performance
+# data will be processed using the service_perfdata_command (also
+# defined below).  Read the HTML docs for more information on
+# performance data.
+# Values: 1 = process performance data, 0 = do not process performance data
+
+process_performance_data=0
+
+
+
+# HOST AND SERVICE PERFORMANCE DATA PROCESSING COMMANDS
+# These commands are run after every host and service check is
+# performed.  These commands are executed only if the
+# enable_performance_data option (above) is set to 1.  The command
+# argument is the short name of a command definition that you
+# define in your host configuration file.  Read the HTML docs for
+# more information on performance data.
+
+#host_perfdata_command=process-host-perfdata
+#service_perfdata_command=process-service-perfdata
+
+
+
+# HOST AND SERVICE PERFORMANCE DATA FILES
+# These files are used to store host and service performance data.
+# Performance data is only written to these files if the
+# enable_performance_data option (above) is set to 1.
+
+#host_perfdata_file=/var/lib/naemon/host-perfdata
+#service_perfdata_file=/var/lib/naemon/service-perfdata
+
+
+
+# HOST AND SERVICE PERFORMANCE DATA FILE TEMPLATES
+# These options determine what data is written (and how) to the
+# performance data files.  The templates may contain macros, special
+# characters (\t for tab, \r for carriage return, \n for newline)
+# and plain text.  A newline is automatically added after each write
+# to the performance data file.  Some examples of what you can do are
+# shown below.
+
+#host_perfdata_file_template=[HOSTPERFDATA]\t$TIMET$\t$HOSTNAME$\t$HOSTEXECUTIONTIME$\t$HOSTOUTPUT$\t$HOSTPERFDATA$
+#service_perfdata_file_template=[SERVICEPERFDATA]\t$TIMET$\t$HOSTNAME$\t$SERVICEDESC$\t$SERVICEEXECUTIONTIME$\t$SERVICELATENCY$\t$SERVICEOUTPUT$\t$SERVICEPERFDATA$
+
+
+
+# HOST AND SERVICE PERFORMANCE DATA FILE MODES
+# This option determines whether or not the host and service
+# performance data files are opened in write ("w") or append ("a")
+# mode. If you want to use named pipes, you should use the special
+# pipe ("p") mode which avoid blocking at startup, otherwise you will
+# likely want the defult append ("a") mode.
+
+#host_perfdata_file_mode=a
+#service_perfdata_file_mode=a
+
+
+
+# HOST AND SERVICE PERFORMANCE DATA FILE PROCESSING INTERVAL
+# These options determine how often (in seconds) the host and service
+# performance data files are processed using the commands defined
+# below.  A value of 0 indicates the files should not be periodically
+# processed.
+
+#host_perfdata_file_processing_interval=0
+#service_perfdata_file_processing_interval=0
+
+
+
+# HOST AND SERVICE PERFORMANCE DATA FILE PROCESSING COMMANDS
+# These commands are used to periodically process the host and
+# service performance data files.  The interval at which the
+# processing occurs is determined by the options above.
+
+#host_perfdata_file_processing_command=process-host-perfdata-file
+#service_perfdata_file_processing_command=process-service-perfdata-file
+
+
+
+# HOST AND SERVICE PERFORMANCE DATA PROCESS EMPTY RESULTS
+# These options determine wether the core will process empty perfdata
+# results or not. This is needed for distributed monitoring, and intentionally
+# turned on by default.
+# If you don't require empty perfdata - saving some cpu cycles
+# on unwanted macro calculation - you can turn that off. Be careful!
+# Values: 1 = enable, 0 = disable
+
+#host_perfdata_process_empty_results=1
+#service_perfdata_process_empty_results=1
+
+
+# OBSESS OVER SERVICE CHECKS OPTION
+# This determines whether or not Naemon will obsess over service
+# checks and run the ocsp_command defined below.  Unless you're
+# planning on implementing distributed monitoring, do not enable
+# this option.  Read the HTML docs for more information on
+# implementing distributed monitoring.
+# Values: 1 = obsess over services, 0 = do not obsess (default)
+
+obsess_over_services=<%= @naemon_url.nil? ? 0 : 1 %>
+
+
+
+# OBSESSIVE COMPULSIVE SERVICE PROCESSOR COMMAND
+# This is the command that is run for every service check that is
+# processed by Naemon.  This command is executed only if the
+# obsess_over_services option (above) is set to 1.  The command
+# argument is the short name of a command definition that you
+# define in your host configuration file. Read the HTML docs for
+# more information on implementing distributed monitoring.
+
+<% if !@naemon_url.nil? %>
+ocsp_command=notify-master
+<% end %>
+
+
+
+# OBSESS OVER HOST CHECKS OPTION
+# This determines whether or not Naemon will obsess over host
+# checks and run the ochp_command defined below.  Unless you're
+# planning on implementing distributed monitoring, do not enable
+# this option.  Read the HTML docs for more information on
+# implementing distributed monitoring.
+# Values: 1 = obsess over hosts, 0 = do not obsess (default)
+
+obsess_over_hosts=0
+
+
+
+# OBSESSIVE COMPULSIVE HOST PROCESSOR COMMAND
+# This is the command that is run for every host check that is
+# processed by Naemon.  This command is executed only if the
+# obsess_over_hosts option (above) is set to 1.  The command
+# argument is the short name of a command definition that you
+# define in your host configuration file. Read the HTML docs for
+# more information on implementing distributed monitoring.
+
+#ochp_command=somecommand
+
+
+
+# TRANSLATE PASSIVE HOST CHECKS OPTION
+# This determines whether or not Naemon will translate
+# DOWN/UNREACHABLE passive host check results into their proper
+# state for this instance of Naemon.  This option is useful
+# if you have distributed or failover monitoring setup.  In
+# these cases your other Naemon servers probably have a different
+# "view" of the network, with regards to the parent/child relationship
+# of hosts.  If a distributed monitoring server thinks a host
+# is DOWN, it may actually be UNREACHABLE from the point of
+# this Naemon instance.  Enabling this option will tell Naemon
+# to translate any DOWN or UNREACHABLE host states it receives
+# passively into the correct state from the view of this server.
+# Values: 1 = perform translation, 0 = do not translate (default)
+
+translate_passive_host_checks=0
+
+
+
+# PASSIVE HOST CHECKS ARE SOFT OPTION
+# This determines whether or not Naemon will treat passive host
+# checks as being HARD or SOFT.  By default, a passive host check
+# result will put a host into a HARD state type.  This can be changed
+# by enabling this option.
+# Values: 0 = passive checks are HARD, 1 = passive checks are SOFT
+
+passive_host_checks_are_soft=0
+
+
+
+# ORPHANED HOST/SERVICE CHECK OPTIONS
+# These options determine whether or not Naemon will periodically
+# check for orphaned host service checks.  Since service checks are
+# not rescheduled until the results of their previous execution
+# instance are processed, there exists a possibility that some
+# checks may never get rescheduled.  A similar situation exists for
+# host checks, although the exact scheduling details differ a bit
+# from service checks.  Orphaned checks seem to be a rare
+# problem and should not happen under normal circumstances.
+# If you have problems with service checks never getting
+# rescheduled, make sure you have orphaned service checks enabled.
+# Values: 1 = enable checks, 0 = disable checks
+
+check_for_orphaned_services=1
+check_for_orphaned_hosts=1
+
+
+
+# SERVICE FRESHNESS CHECK OPTION
+# This option determines whether or not Naemon will periodically
+# check the "freshness" of service results.  Enabling this option
+# is useful for ensuring passive checks are received in a timely
+# manner.
+# Values: 1 = enabled freshness checking, 0 = disable freshness checking
+
+check_service_freshness=1
+
+
+
+# SERVICE FRESHNESS CHECK INTERVAL
+# This setting determines how often (in seconds) Naemon will
+# check the "freshness" of service check results.  If you have
+# disabled service freshness checking, this option has no effect.
+
+service_freshness_check_interval=60
+
+
+
+# SERVICE CHECK TIMEOUT STATE
+# This setting determines the state Naemon will report when a
+# service check times out - that is does not respond within
+# service_check_timeout seconds.  This can be useful if a
+# machine is running at too high a load and you do not want
+# to consider a failed service check to be critical (the default).
+# Valid settings are:
+# c - Critical (default)
+# u - Unknown
+# w - Warning
+# o - OK
+
+service_check_timeout_state=c
+
+
+
+# HOST FRESHNESS CHECK OPTION
+# This option determines whether or not Naemon will periodically
+# check the "freshness" of host results.  Enabling this option
+# is useful for ensuring passive checks are received in a timely
+# manner.
+# Values: 1 = enabled freshness checking, 0 = disable freshness checking
+
+check_host_freshness=0
+
+
+
+# HOST FRESHNESS CHECK INTERVAL
+# This setting determines how often (in seconds) Naemon will
+# check the "freshness" of host check results.  If you have
+# disabled host freshness checking, this option has no effect.
+
+host_freshness_check_interval=60
+
+
+
+
+# ADDITIONAL FRESHNESS THRESHOLD LATENCY
+# This setting determines the number of seconds that Naemon
+# will add to any host and service freshness thresholds that
+# it calculates (those not explicitly specified by the user).
+
+additional_freshness_latency=15
+
+
+
+
+# FLAP DETECTION OPTION
+# This option determines whether or not Naemon will try
+# and detect hosts and services that are "flapping".
+# Flapping occurs when a host or service changes between
+# states too frequently.  When Naemon detects that a
+# host or service is flapping, it will temporarily suppress
+# notifications for that host/service until it stops
+# flapping.  Flap detection is very experimental, so read
+# the HTML documentation before enabling this feature!
+# Values: 1 = enable flap detection
+#         0 = disable flap detection (default)
+
+enable_flap_detection=1
+
+
+
+# FLAP DETECTION THRESHOLDS FOR HOSTS AND SERVICES
+# Read the HTML documentation on flap detection for
+# an explanation of what this option does.  This option
+# has no effect if flap detection is disabled.
+
+low_service_flap_threshold=5.0
+high_service_flap_threshold=20.0
+low_host_flap_threshold=5.0
+high_host_flap_threshold=20.0
+
+
+
+# DATE FORMAT OPTION
+# This option determines how short dates are displayed. Valid options
+# include:
+#    us             (MM-DD-YYYY HH:MM:SS)
+#    euro           (DD-MM-YYYY HH:MM:SS)
+#    iso8601        (YYYY-MM-DD HH:MM:SS)
+#    strict-iso8601 (YYYY-MM-DDTHH:MM:SS)
+#
+
+date_format=iso8601
+
+
+
+
+# TIMEZONE OFFSET
+# This option is used to override the default timezone that this
+# instance of Naemon runs in.  If not specified, Naemon will use
+# the system configured timezone.
+
+#use_timezone=US/Mountain
+#use_timezone=Australia/Brisbane
+
+
+
+# ILLEGAL OBJECT NAME CHARACTERS
+# This option allows you to specify illegal characters that cannot
+# be used in host names, service descriptions, or names of other
+# object types.
+
+illegal_object_name_chars=`~!$%^&*|'"<>?,()=
+
+
+
+# ILLEGAL MACRO OUTPUT CHARACTERS
+# This option allows you to specify illegal characters that are
+# stripped from macros before being used in notifications, event
+# handlers, etc.  This DOES NOT affect macros used in service or
+# host check commands.
+# The following macros are stripped of the characters you specify:
+#    $HOSTOUTPUT$
+#    $HOSTPERFDATA$
+#    $HOSTACKAUTHOR$
+#    $HOSTACKCOMMENT$
+#    $SERVICEOUTPUT$
+#    $SERVICEPERFDATA$
+#    $SERVICEACKAUTHOR$
+#    $SERVICEACKCOMMENT$
+
+illegal_macro_output_chars=`~$&|'"<>
+
+
+
+# REGULAR EXPRESSION MATCHING
+# This option controls whether or not regular expression matching
+# takes place in the object config files.  Regular expression
+# matching is used to match host, hostgroup, service, and service
+# group names/descriptions in some fields of various object types.
+# Values: 1 = enable regexp matching, 0 = disable regexp matching
+
+use_regexp_matching=0
+
+
+
+# "TRUE" REGULAR EXPRESSION MATCHING
+# This option controls whether or not "true" regular expression
+# matching takes place in the object config files.  This option
+# only has an effect if regular expression matching is enabled
+# (see above).  If this option is DISABLED, regular expression
+# matching only occurs if a string contains wildcard characters
+# (* and ?).  If the option is ENABLED, regexp matching occurs
+# all the time (which can be annoying).
+# Values: 1 = enable true matching, 0 = disable true matching
+
+use_true_regexp_matching=0
+
+
+
+# ADMINISTRATOR EMAIL/PAGER ADDRESSES
+# The email and pager address of a global administrator (likely you).
+# Naemon never uses these values itself, but you can access them by
+# using the $ADMINEMAIL$ and $ADMINPAGER$ macros in your notification
+# commands.
+
+admin_email=naemon@localhost
+admin_pager=pagenaemon@localhost
+
+
+
+# DEBUG LEVEL
+# This option determines how much (if any) debugging information will
+# be written to the debug file.  OR values together to log multiple
+# types of information.
+# Values:
+#      -1 = Everything
+#       0 = Nothing
+#       1 = Functions
+#       2 = Configuration
+#       4 = Process information
+#       8 = Scheduled events
+#      16 = Host/service checks
+#      32 = Notifications
+#      64 = Event broker
+#     128 = External commands
+#     256 = Commands
+#     512 = Scheduled downtime
+#    1024 = Comments
+#    2048 = Macros
+
+debug_level=0
+
+
+
+# DEBUG VERBOSITY
+# This option determines how verbose the debug log out will be.
+# Values: 0 = Brief output
+#         1 = More detailed
+#         2 = Very detailed
+
+debug_verbosity=1
+
+
+
+# DEBUG FILE
+# This option determines where Naemon should write debugging information.
+
+debug_file=/var/lib/naemon/naemon.debug
+
+
+
+# MAX DEBUG FILE SIZE
+# This option determines the maximum size (in bytes) of the debug file.  If
+# the file grows larger than this size, it will be renamed with a .old
+# extension.  If a file already exists with a .old extension it will
+# automatically be deleted.  This helps ensure your disk space usage doesn't
+# get out of control when debugging Naemon.
+
+max_debug_file_size=1000000
+
+
+
+# Should we allow hostgroups to have no hosts, we default this to off since
+# that was the old behavior
+
+allow_empty_hostgroup_assignment=0
+
+
+
+# Normally worker count is dynamically allocated based on 1.5 * number of cpu's
+# with a minimum of 4 workers.  This value will override the defaults
+
+#check_workers=3
+
+# CIRCULAR DEPENDENCIES (EXPERIMENTAL)
+# Allow for circular dependencies in naemon's host graph.
+# Enabaling this will cause propagation the following to stop working:
+# * scheduling downtime
+# * enabling notification
+# * disabling notification
+# This feature is experimental and bugs might occur.
+
+allow_circular_dependencies=0
diff --git a/modules/profile/templates/monitoring/resource.cfg.erb b/modules/profile/templates/monitoring/resource.cfg.erb

new file mode 100644 (file)

index 0000000..5a5c3ee
--- /dev/null
+++ b/modules/profile/templates/monitoring/resource.cfg.erb
@@ -0,0 +1,30 @@
+###########################################################################
+#
+# RESOURCE.CFG - Sample Resource File for Naemon 1.0.7
+#
+#
+# You can define $USERx$ macros in this file, which can in turn be used
+# in command definitions in your host config file(s).  $USERx$ macros are
+# useful for storing sensitive information such as usernames, passwords,
+# etc.  They are also handy for specifying the path to plugins and
+# event handlers - if you decide to move the plugins or event handlers to
+# a different directory in the future, you can just update one or two
+# $USERx$ macros, instead of modifying a lot of command definitions.
+#
+# Naemon supports up to 256 $USERx$ macros ($USER1$ through $USER256$)
+#
+# Resource files may also be used to store configuration directives for
+# external data sources like MySQL...
+#
+###########################################################################
+
+# Sets $USER1$ to be the path to the plugins
+$USER1$=/usr/lib/monitoring-plugins
+$USER2$=<%= @plugins %>
+
+# Sets $USER2$ to be the path to event handlers
+#$USER2$=/usr/lib/monitoring-plugins/eventhandlers
+
+# Store some usernames and passwords (hidden from the CGIs)
+#$USER3$=someuser
+#$USER4$=somepassword
diff --git a/modules/profile/templates/monitoring/send_nrdp.sh.erb b/modules/profile/templates/monitoring/send_nrdp.sh.erb

new file mode 100755 (executable)

index 0000000..41f58e5
--- /dev/null
+++ b/modules/profile/templates/monitoring/send_nrdp.sh.erb
@@ -0,0 +1,271 @@
+#!/bin/bash
+#
+# check_nrdp.sh
+#
+# Copyright (c) 2010-2017 - Nagios Enterprises, LLC.
+# Written by: Scott Wilkerson (nagios@nagios.org)
+#
+# 2017-09-25 Troy Lea aka BOX293
+#  - Fixed script not working with arguments when run as a cron job
+#    or if being used as a nagios command like obsessive compulsive.
+#     ... "if [ ! -t 0 ]" was the reason why.
+# 2017-12-08 Jørgen van der Meulen (Conclusion Xforce)
+#  - Fixed typo in NRDP abbreviation
+
+
+PROGNAME=$(basename $0)
+RELEASE="Revision 0.6.1"
+
+print_release() {
+    echo "$RELEASE"
+}
+
+print_usage() {
+    echo ""
+    echo "$PROGNAME $RELEASE - Send NRDP script for Nagios"
+    echo ""
+    echo "Usage: send_nrdp.sh -u URL -t token [options]"
+    echo ""
+    echo "Usage: $PROGNAME -h display help"
+    echo ""
+}
+
+print_help() {
+        print_usage
+        echo ""
+        echo "This script is used to send NRDP data to a Nagios server"
+        echo ""
+        echo "Required:"
+        echo "    -u","    URL of NRDP server.  Usually http://<IP_ADDRESS>/nrdp/"
+        echo "    -t","    Shared token.  Must be the same token set in NRDP Server"
+        echo ""
+        echo "Options:"
+        echo "    Single Check:"
+        echo "        -H    host name"
+        echo "        -s    service name"
+        echo "        -S    State"
+        echo "        -o     output"
+        echo ""
+        echo "    STDIN:"
+        echo "        [-d    delimiter] (default -d \"\\t\")"
+        echo "        With only the required parameters $PROGNAME is capable of"
+        echo "        processing data piped to it either from a file or other"
+        echo "        process.  By default, we use \t as the delimiter however this"
+        echo "        may be specified with the -d option data should be in the"
+        echo "        following formats one entry per line."
+        echo "        For Host checks:"
+        echo "        hostname    State    output"
+        echo "        For Service checks"
+        echo "        hostname    servicename    State    output"
+        echo ""
+        echo "    File:"
+        echo "        -f /full/path/to/file"
+        echo "        This file will be sent to the NRDP server specified in -u"
+        echo "        The file should be an XML file in the following format"
+        echo "        ##################################################"
+        echo ""
+        echo "        <?xml version='1.0'?>"
+        echo "        <checkresults>"
+        echo "          <checkresult type=\"host\" checktype=\"1\">"
+        echo "            <hostname>YOUR_HOSTNAME</hostname>"
+        echo "            <state>0</state>"
+        echo "            <output>OK|perfdata=1.00;5;10;0</output>"
+        echo "          </checkresult>"
+        echo "          <checkresult type=\"service\" checktype=\"1\">"
+        echo "            <hostname>YOUR_HOSTNAME</hostname>"
+        echo "            <servicename>YOUR_SERVICENAME</servicename>"
+        echo "            <state>0</state>"
+        echo "            <output>OK|perfdata=1.00;5;10;0</output>"
+        echo "          </checkresult>"
+        echo "        </checkresults>"
+        echo "        ##################################################"
+        echo ""
+        echo "    Directory:"
+        echo "        -D /path/to/temp/dir"
+        echo "        This is a directory that contains XML files in the format"
+        echo "        above.  Additionally, if the -d flag is specified, $PROGNAME"
+        echo "        will create temp files here if the server could not be reached."
+        echo "        On additional calls with the same -D path, if a connection to"
+        echo "        the server is successful, all temp files will be sent."
+        exit 0
+}
+
+send_data() {
+    pdata="token=$token&cmd=submitcheck"
+    if [ $file ]; then
+        fdata="--data-urlencode XMLDATA@$file"
+        rslt=`curl -f --silent --insecure -d "$pdata" $fdata "$url/"`
+    else
+        pdata="$pdata&XMLDATA=$1"
+        rslt=`curl -f --silent --insecure -d "$pdata" "$url/"`
+    fi
+    
+    ret=$?
+
+    status=`echo $rslt | sed -n 's|.*<status>\(.*\)</status>.*|\1|p'`
+    message=`echo $rslt | sed -n 's|.*<message>\(.*\)</message>.*|\1|p'`
+    if [ $ret != 0 ];then
+        echo "ERROR: could not connect to NRDP server at $url"
+        # verify we are not processing the directory already and then write to the directory
+        if [ ! "$2" ] && [ $directory ];then
+            if [ ! -d "$directory" ];then
+                mkdir -p "$directory"
+            fi
+            # This is where we write to the tmp directory
+            echo $xml > `mktemp $directory/nrdp.XXXXXX`
+        fi
+        exit 1
+    fi
+    
+    if [ "$status" != "0" ];then
+        # This means we couldn't connect to NRPD server
+        echo "ERROR: The NRDP Server said $message"
+        # verify we are not processing the directory already and then write to the directory
+        if [ ! "$2" ] && [ $directory ];then
+            if [ ! -d "$directory" ];then
+                mkdir -p "$directory"
+            fi
+            # This is where we write to the tmp directory
+            echo $xml > `mktemp $directory/nrdp.XXXXXX`
+        fi
+        
+        exit 2
+    fi
+    
+    # If this was a directory call and was successful, remove the file
+    if [ $2 ] && [ "$status" == "0" ];then
+        rm -f "$2"
+    fi
+
+    # If we weren't successful error
+    if [ $ret != 0 ];then
+        echo "exited with error "$ret
+        exit $ret
+    fi
+}
+
+# Parse parameters
+url="<%= @naemon_url %>"
+token="<%= @naemon_token %>"
+
+while getopts "u:t:H:s:S:o:f:d:c:D:hv" option
+do
+  case $option in
+    u) url=$OPTARG ;;
+    t) token=$OPTARG ;;
+    H) host=$OPTARG ;;
+    s) service=$OPTARG ;;
+    S) State=$OPTARG ;;
+    o) output=$OPTARG ;;
+    f) file=$OPTARG ;;
+    d) delim=$OPTARG ;;
+    c) checktype=$OPTARG ;;
+    D) directory=$OPTARG ;;
+    h) print_help 0;;
+    v) print_release
+        exit 0 ;;
+  esac
+done
+
+if [ ! $checktype ]; then
+ checktype=1
+fi
+if [ ! $delim ]; then
+ delim=`echo -e "\t"`
+fi
+
+if [ "x$url" == "x" -o "x$token" == "x" ]
+then
+  echo "Usage: send_nrdp -u url -t token"
+  exit 1
+fi
+# detecting curl 
+if [[ `which curl` =~ "/curl" ]]
+ then curl=1; 
+fi
+
+if [[ ! $curl ]];
+then
+  echo "Either curl or wget are required to run $PROGNAME"
+  exit 1
+fi
+
+checkcount=0
+
+if [ $host ]; then
+    xml=""
+    # we are not getting piped results
+    if [ "$host" == "" ] || [ "$State" == "" ]; then
+        echo "You must provide a host -H and State -S"
+        exit 2
+    fi
+    if [ "$service" != "" ]; then
+        xml="$xml<checkresult type='service' checktype='$checktype'><servicename>$service</servicename>"
+    else
+        xml="$xml<checkresult type='host' checktype='$checktype'>"
+    fi
+    
+    # urlencode XML special chars
+    output=${output//&/%26}
+    output=${output//</%3C}
+    output=${output//>/%3E}
+    
+    xml="$xml<hostname>$host</hostname><state>$State</state><output><![CDATA["$output"]]></output></checkresult>"
+    checkcount=1
+fi
+
+ # If only url and token have been provided then it is assumed that data is being piped
+########################
+if [[ ! $host && ! $State && ! $file && ! $directory ]]; then
+    xml=""
+    # we know we are being piped results
+    IFS=$delim
+    
+    while read -r line ; do
+        arr=($line)
+        if [ ${#arr[@]} != 0 ];then
+            if [[ ${#arr[@]} < 3 ]] || [[ ${#arr[@]} > 4 ]];then
+                echo "ERROR: STDIN must be either 3 or 4 fields long, I found "${#arr[@]}
+            else
+                if [ ${#arr[@]} == 4 ]; then
+                    xml="$xml<checkresult type='service' checktype='$checktype'>
+                    <servicename>${arr[1]}</servicename>
+                    <hostname>${arr[0]}</hostname>
+                    <state>${arr[2]}</state>
+                    <output>${arr[3]}</output>"
+                else
+                    xml="$xml<checkresult type='host' checktype='$checktype'>
+                    <hostname>${arr[0]}</hostname>
+                    <state>${arr[1]}</state>
+                    <output>${arr[2]}</output>"
+                fi
+                
+                xml="$xml</checkresult>"
+                checkcount=$[checkcount+1]
+            fi
+        fi
+    done
+    IFS=" "
+fi
+
+if [ $file ]; then
+    xml=`cat $file`
+    send_data "$xml"
+fi
+
+if [ $directory ]; then
+    #echo "Processing directory..."
+    for f in `ls $directory`
+    do
+      #echo "Processing $f file..."
+      # take action on each file. $f store current file name
+      xml=`cat $directory/$f`
+      send_data "$xml" "$directory/$f"
+    done
+fi
+
+if [ "x$file" == "x" ] && [ "x$directory" == "x" ]; then
+    xml="<?xml version='1.0'?><checkresults>$xml</checkresults>"
+    send_data "$xml"
+    echo "Sent $checkcount checks to $url"
+fi
diff --git a/modules/role/manifests/backup.pp b/modules/role/manifests/backup.pp

index b35c54270fbe2f573700ab782840fe10e32653b0..6b8d00c00d93b10af27ba96724931eadbecaa5ac 100644 (file)
--- a/modules/role/manifests/backup.pp
+++ b/modules/role/manifests/backup.pp
@@ -14,6 +14,7 @@ class role::backup (
    include "profile::xmr_stak"
    include "profile::known_hosts"
    include "profile::boinc"
+  include "profile::monitoring"
  
    include "role::backup::postgresql"
  
@@ -124,5 +125,11 @@ class role::backup (
          order   => "$order_dirname-$order_part",
        }
      }
+
+    @profile::monitoring::local_service { "Last backup in $base is not too old":
+      local => {
+        check_command => "check_last_file_date!$base!14",
+      }
+    }
    }
  }
diff --git a/modules/role/manifests/caldance.pp b/modules/role/manifests/caldance.pp

index 75d9dbd0dc2f5b8c91167954017ca65b4ae8c7bb..b7948e65647e59288f7950c7bab5a7569b8d8ab1 100644 (file)
--- a/modules/role/manifests/caldance.pp
+++ b/modules/role/manifests/caldance.pp
@@ -6,6 +6,7 @@ class role::caldance (
    include "profile::postgresql"
    include "profile::apache"
    include "profile::redis"
+  include "profile::monitoring"
  
    ensure_packages(["python-pip", "python-virtualenv", "python-django"])
  }
diff --git a/modules/role/manifests/cryptoportfolio.pp b/modules/role/manifests/cryptoportfolio.pp

index c675e91421b7c22f5d5af5d3f14e7b969a542800..8f7bfca5ffe0ad8e3be1f452cb22de2dd346a830 100644 (file)
--- a/modules/role/manifests/cryptoportfolio.pp
+++ b/modules/role/manifests/cryptoportfolio.pp
@@ -41,7 +41,6 @@ class role::cryptoportfolio (
      contain "role::cryptoportfolio::bot"
    }
  
-  # FIXME: restore backup
    unless empty($front_version) {
      contain "role::cryptoportfolio::front"
    }
diff --git a/modules/role/manifests/etherpad.pp b/modules/role/manifests/etherpad.pp

index 5ab5023124a28805980a03d5c72c08273db0054c..119af5653d808284e8d0f81763404ede9d691e9b 100644 (file)
--- a/modules/role/manifests/etherpad.pp
+++ b/modules/role/manifests/etherpad.pp
@@ -22,6 +22,7 @@ class role::etherpad (
    include "profile::tools"
    include "profile::postgresql"
    include "profile::apache"
+  include "profile::monitoring"
  
    ensure_packages(["npm"])
    ensure_packages(["abiword"])
@@ -121,4 +122,17 @@ class role::etherpad (
      proxy_preserve_host => true;
      default: *          => $::profile::apache::apache_vhost_default;
    }
+
+  @profile::monitoring::external_service { "Etherpad service is running on $web_host":
+    type   => "web",
+    master => {
+      check_command => "check_https!$web_host!/!<title>Etherpad"
+    }
+  }
+  @profile::monitoring::external_service { "$web_host ssl certificate is up to date":
+    type   => "web",
+    master => {
+      check_command => "check_https_certificate!$web_host"
+    }
+  }
  }
diff --git a/readme.md b/readme.md

new file mode 100644 (file)

index 0000000..74bb294
--- /dev/null
+++ b/readme.md
@@ -0,0 +1,31 @@
+# Puppet configuration repository for immae.eu's services
+
+This repository has the aim to help automate the installation of servers
+planned for a specific task, with the help of Puppet. The host are
+supposed to be listed in an LDAP-like database, which will contain the
+necessary credentials, variable configuration and secrets for each
+server.
+
+## Structure
+
+The repository is structured along Puppet modules (`modules/`
+directory). Each machine has one or several `role`, which determine the
+set of programs and configuration to install. Each role may be
+standalone, or require a set of `profile`, which is seen as a
+reusable component. (The structure is inspired from the tutorial at
+[https://www.craigdunn.org/2012/05/239/](https://www.craigdunn.org/2012/05/239/) )
+
+
+## TODO
+
+- Complete documentation
+- Add some monitoring:
+  - modules/profile/manifests/postgresql/ssl.pp (check postgresql certificate)
+  - modules/profile/manifests/postgresql/backup\_pgbouncer.pp (check pgbouncer works)
+  - modules/profile/manifests/mail.pp (check e-mails are going through)
+  - modules/profile/manifests/redis.pp (check redis is running)
+  - modules/role/manifests/cryptoportfolio (role-specific checks)
+- Add redis replication and dumps
+- Restore backups for cryptoportfolio
+- Ensure latest by default for packages
+- try to do a mkfs.ext4 for cloud vps
diff --git a/scripts/ovh_cloud_instance/arch_host_script.sh b/scripts/ovh_cloud_instance/arch_host_script.sh

index 42dcc4a9d8636e07e7d5323051d685ada2cacb04..378b0bef92189092b3a9c5ab9344a15d9d08f93d 100755 (executable)
--- a/scripts/ovh_cloud_instance/arch_host_script.sh
+++ b/scripts/ovh_cloud_instance/arch_host_script.sh
@@ -25,7 +25,7 @@ PART="/dev/disk/by-uuid/$UUID"
  # mkfs.ext4 -F -U "$UUID" "$DEVICE"
  sudo mount "$DEVICE" /mnt
  
-##### FIXME: mkfs.ext4 would be better ####
+##### mkfs.ext4 would be better ####
  for i in /mnt/*; do
    if [ "$i" = "/mnt/boot" ]; then
      # keep /boot/grub
@@ -34,7 +34,7 @@ for i in /mnt/*; do
      sudo rm -rf $i
    fi
  done
-##### /FIXME ####
+##### / ####
  
  sudo pacstrap -G /mnt base git puppet
  
diff --git a/scripts/ovh_vps_ssd/arch_chroot_script.sh b/scripts/ovh_vps_ssd/arch_chroot_script.sh

index 7b7887fd873f1f40e840383c05d31d6dc2bcd15f..57e793bfe9bd51a12059463d4ce9896743525b5a 100755 (executable)
--- a/scripts/ovh_vps_ssd/arch_chroot_script.sh
+++ b/scripts/ovh_vps_ssd/arch_chroot_script.sh
@@ -10,7 +10,7 @@ DEVICE=$(realpath "$PART")
  # mkfs.ext4 -F -U "$UUID" "$DEVICE"
  mount "$DEVICE" /mnt
  
-##### FIXME: mkfs.ext4 would be better ####
+##### mkfs.ext4 would be better ####
  for i in /mnt/*; do
    if [ "$i" = "/mnt/boot" ]; then
      # keep /boot/grub
@@ -19,7 +19,7 @@ for i in /mnt/*; do
      rm -rf $i
    fi
  done
-##### /FIXME ####
+##### / ####
  
  pacstrap -G /mnt base git puppet
author	Ismaël Bouya <ismael.bouya@normalesup.org>
	Wed, 11 Jul 2018 07:31:24 +0000 (09:31 +0200)
committer	Ismaël Bouya <ismael.bouya@normalesup.org>
	Wed, 11 Jul 2018 07:31:24 +0000 (09:31 +0200)
modules/base_installation/lib/puppet/provider/package/pacman.rb	[new file with mode: 0644]	patch \| blob
modules/base_installation/lib/puppet/provider/package/pip2.rb	[new file with mode: 0644]	patch \| blob
modules/base_installation/manifests/package_managers.pp		patch \| blob \| blame \| history
modules/profile/files/monitoring/check_command	[new file with mode: 0644]	patch \| blob
modules/profile/files/monitoring/check_last_file_date	[new file with mode: 0644]	patch \| blob
modules/profile/files/monitoring/check_md_raid	[new file with mode: 0644]	patch \| blob
modules/profile/files/monitoring/check_postgres_replication	[new file with mode: 0644]	patch \| blob
modules/profile/manifests/fstab.pp		patch \| blob \| blame \| history
modules/profile/manifests/monitoring.pp	[new file with mode: 0644]	patch \| blob
modules/profile/manifests/monitoring/commands.pp	[new file with mode: 0644]	patch \| blob
modules/profile/manifests/monitoring/contacts.pp	[new file with mode: 0644]	patch \| blob
modules/profile/manifests/monitoring/external_service.pp	[new file with mode: 0644]	patch \| blob
modules/profile/manifests/monitoring/hosts.pp	[new file with mode: 0644]	patch \| blob
modules/profile/manifests/monitoring/local_service.pp	[new file with mode: 0644]	patch \| blob
modules/profile/manifests/monitoring/params.pp	[new file with mode: 0644]	patch \| blob
modules/profile/manifests/monitoring/services.pp	[new file with mode: 0644]	patch \| blob
modules/profile/manifests/monitoring/times.pp	[new file with mode: 0644]	patch \| blob
modules/profile/manifests/postgresql.pp		patch \| blob \| blame \| history
modules/profile/manifests/postgresql/backup_dump.pp		patch \| blob \| blame \| history
modules/profile/manifests/postgresql/backup_pgbouncer.pp		patch \| blob \| blame \| history
modules/profile/manifests/postgresql/master.pp		patch \| blob \| blame \| history
modules/profile/manifests/postgresql/ssl.pp		patch \| blob \| blame \| history
modules/profile/templates/monitoring/naemon.cfg.erb	[new file with mode: 0644]	patch \| blob
modules/profile/templates/monitoring/resource.cfg.erb	[new file with mode: 0644]	patch \| blob
modules/profile/templates/monitoring/send_nrdp.sh.erb	[new file with mode: 0755]	patch \| blob
modules/role/manifests/backup.pp		patch \| blob \| blame \| history
modules/role/manifests/caldance.pp		patch \| blob \| blame \| history
modules/role/manifests/cryptoportfolio.pp		patch \| blob \| blame \| history
modules/role/manifests/etherpad.pp		patch \| blob \| blame \| history
readme.md	[new file with mode: 0644]	patch \| blob
scripts/ovh_cloud_instance/arch_host_script.sh		patch \| blob \| blame \| history
scripts/ovh_vps_ssd/arch_chroot_script.sh		patch \| blob \| blame \| history