aboutsummaryrefslogblamecommitdiff
path: root/modules/private/monitoring/objects_common.nix
blob: 66fb812bf1ba342cccd89614aa0a1324d34935ba (plain) (tree)











































































                                                                                           
                                                                                                       
                                                                                                                             
                                                                                                                                                 
                                                                                            





































































































                                                                                                                                                                                                                                                                                                       
{ hostFQDN
, processWarn ? "250"
, processAlert ? "400"
, loadWarn ? "8.0"
, loadAlert ? "10.0"
, mdadm
, sudo
, ...
}:
{
  host = {
    "${hostFQDN}" = {
      alias = hostFQDN;
      address = hostFQDN;
      use = "linux-server";
    };
  };
  service = [
    {
      service_description = "Size on root partition";
      use = "local-service";
      check_command = ["check_local_disk" "20%" "10%" "/"];
    }
    {
      service_description = "Total number of process";
      use = "local-service";
      check_command = [
        "check_local_procs"
        processWarn
        processAlert
        "RSZDT"
      ];
    }
    {
      service_description = "Average load";
      use = "local-service";
      check_command = [
        "check_local_load"
        "${loadWarn},${loadWarn},${loadWarn}"
        "${loadAlert},${loadAlert},${loadAlert}"
      ];
    }
    {
      service_description = "Swap usage";
      use = "local-service";
      check_command = ["check_local_swap" "20" "10"];
    }
    {
      service_description = "Memory usage";
      use = "local-service";
      check_command = ["check_memory" "80" "90"];
    }
    {
      service_description = "NTP is activated and working";
      use = "local-service";
      check_command = ["check_ntp"];
    }
    {
      service_description = "No mdadm array is degraded";
      use = "local-service";
      check_command = [
        "check_command_output"
        "${mdadm}/bin/mdadm --monitor --scan -1"
        "^$"
        "-s 0 -r root"
      ];
    }
  ];
  command = {
    check_local_disk = "$USER1$/check_disk -w $ARG1$ -c $ARG2$ -p $ARG3$";
    check_local_procs = "$USER1$/check_procs -w $ARG1$ -c $ARG2$ -s $ARG3$";
    check_local_load = "$USER1$/check_load -w $ARG1$ -c $ARG2$";
    check_local_swap = "$USER1$/check_swap -n ok -w $ARG1$ -c $ARG2$";
    check_memory = "$USER2$/check_mem.sh -w $ARG1$ -c $ARG2$";
    check_command_output = "$USER2$/check_command -c \"$ARG1$\" -s 0 -o \"$ARG2$\" $ARG3$";
    check_ntp = "$USER1$/check_ntp_time -t 30 -q -H 0.arch.pool.ntp.org";
    check_mysql_replication = "${sudo} -u mysql $USER2$/check_mysql_replication \"$ARG1$\" \"$ARG2$\"";
    check_postgresql_replication = "${sudo} -u postgres $USER2$/check_postgres_replication \"$ARG1$\" \"$ARG2$\" \"$ARG3$\"";
    check_openldap_replication = "${sudo} -u openldap $USER2$/check_openldap_replication \"$ARG1$\" \"$ARG2$\" \"$ARG3$\" \"$ARG4$\" \"$ARG5$\"";
    check_redis_replication = "${sudo} -u redis $USER2$/check_redis_replication \"$ARG1$\"";
    check_mailq = "$USER1$/check_mailq -s -w 1 -c 2";

    check_host_alive = "$USER1$/check_ping -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 5";
    check_last_file_date = "${sudo} -u \"$ARG3$\" $USER2$/check_last_file_date \"$ARG1$\" \"$ARG2$\"";

    #  No notify commands, we go through master
    #  notify_host_by_email = "SERVICENOTIFICATIONID=\"$SERVICENOTIFICATIONID$\" HOSTSTATE=\"$HOSTSTATE$\" HOSTOUTPUT=\"$HOSTOUTPUT$\" $USER2$/notify_by_email host \"$NOTIFICATIONTYPE$\" \"$HOSTALIAS$\" \"$LONGDATETIME$\" \"$CONTACTEMAIL$\" $OVE";
    #  #$OVE is to force naemon to run via shell instead of execve which fails here
    #  notify_service_by_email = "SERVICENOTIFICATIONID=\"$SERVICENOTIFICATIONID$\" SERVICEDESC=\"$SERVICEDESC$\" SERVICESTATE=\"$SERVICESTATE$\" SERVICEOUTPUT=\"$SERVICEOUTPUT$\" $USER2$/notify_by_email service \"$NOTIFICATIONTYPE$\" \"$HOSTALIAS$\" \"$LONGDATETIME$\" \"$CONTACTEMAIL$\" $OVE";
    #  #sudo /usr/bin/strace -o /tmp/foo -vf -s 256 -u naemon $USER2$/notify_by_email
    #  #$OVE is to force naemon to run via shell instead of execve which fails here

    notify-master = "$USER2$/send_nrdp.sh -u \"$USER200$\" -t \"$USER201$\" -H \"$HOSTADDRESS$\" -s \"$SERVICEDESC$\" -S \"$SERVICESTATEID$\" -o \"$SERVICEOUTPUT$\"";
  };
  timeperiod = {
    "24x7" = {
      alias = "24 Hours A Day, 7 Days A Week";
      monday = "00:00-24:00";
      tuesday = "00:00-24:00";
      wednesday = "00:00-24:00";
      thursday = "00:00-24:00";
      friday = "00:00-24:00";
      saturday = "00:00-24:00";
      sunday = "00:00-24:00";
    };
  };
  contactgroup = {
    admins = { alias = "Naemon Administrators"; };
  };
  # No contact, we go through master
  # contact = {
  #   immae = {
  #     alias = "Immae";
  #     use = "generic-contact";
  #     email = "xxxxxxxxxxxxxxxx";
  #   };
  # };
  templates = {
    service = {
      generic-service = {
        active_checks_enabled = "1";
        check_freshness = "0";
        check_interval = "10";
        check_period = "24x7";
        contact_groups = "admins";
        event_handler_enabled = "1";
        flap_detection_enabled = "1";
        is_volatile = "0";
        max_check_attempts = "3";
        notification_interval = "60";
        notification_options = "w,u,c,r,f,s";
        notification_period = "24x7";
        notifications_enabled = "0"; # no notification since we send them to master
        obsess_over_service = "1";
        passive_checks_enabled = "1";
        process_perf_data = "1";
        retain_nonstatus_information = "1";
        retain_status_information = "1";
        retry_interval = "2";
      };
      local-service = {
        use = "generic-service";
        host_name = hostFQDN;
        check_interval = "5";
        max_check_attempts = "4";
        retry_interval = "1";
      };
    };
    # No contact, we go through master
    # contact = {
    #   generic-contact = {
    #     host_notification_commands = "notify_host_by_email";
    #     host_notification_options = "d,u,r,f,s";
    #     host_notification_period = "24x7";
    #     service_notification_commands = "notify_service_by_email";
    #     service_notification_options = "w,u,c,r,f,s";
    #     service_notification_period = "24x7";
    #   };
    # };
    host = {
      generic-host = {
        event_handler_enabled = "1";
        flap_detection_enabled = "1";
        notification_period = "24x7";
        notifications_enabled = "1";
        process_perf_data = "1";
        retain_nonstatus_information = "1";
        retain_status_information = "1";
      };
      linux-server = {
        check_command = "check_host_alive";
        check_interval = "5";
        check_period = "24x7";
        contact_groups = "admins";
        max_check_attempts = "10";
        notification_interval = "120";
        notification_options = "d,u,r,f";
        retry_interval = "1";
      };
    };
  };
}