aboutsummaryrefslogblamecommitdiff
path: root/modules/private/monitoring/objects_common.nix
blob: 10cc1495e0c5703c9471c65fbe025fca958b76c6 (plain) (tree)
1
2
3
4
5
6
7
8
9
          
          
                    

                      
                  

                        
                   

                          

       

        

     










                                                                                         





                           

                                     
                                           



             
                                       




                                                           
                                       









                                                      
                                       













                                                



                                           

                                                   


        
                                       




                                                     
                                       




                                                 
                                       




                                                           
                                       










                                                         
                                                                      

                                                                                                                


                                                                         








                                                                                                                                 

                                                                            
                                                                   
                                                                      
                                                                                    
                                                              
                                                                                     
                                                                                           
                                                                                      
                                                                         
                                                     
                                                         
                                                                                                       
                                                                                                                             
                                                                                                                                                 
                                                          
                                                                                            


                                                                                    
                                                            


                                                                                                      

                                                                
 



                                                                                                                                                                                                                                                                                                                                
                                                                                                                                                                                           

                                                                                                                                                                           
 
                                                                                                                                                                                          












                                              












                                                                     
                                             
    


                                                  














                                             
                                                            





                                           
                                       






                                 































                                              


                                      









                                                                  


















                                           
                                       



        
{ hostFQDN
, hostName
, interface ? "eth0"
, processWarn ? "250"
, processAlert ? "400"
, loadWarn ? "0.9"
, load5Warn ? loadWarn
, load15Warn ? load5Warn
, loadAlert ? "1.0"
, load5Alert ? loadAlert
, load15Alert ? load5Alert
, mdadm
, sudo
, master
, lib
, ...
}:
let
  defaultPassiveInfo = {
    filter = lib.attrsets.filterAttrs
      (k: v: builtins.elem k ["service_description"] || builtins.substring 0 1 k == "_");
    use = "external-passive-service";
    freshness_threshold = "450";
    retry_interval = "1";
    servicegroups = "webstatus-resources";
    host_name = hostFQDN;
  };
in
{
  host = {
    "${hostFQDN}" = {
      alias = hostFQDN;
      address = hostFQDN;
      use = "linux-server";
      hostgroups = "webstatus-hosts";
      _webstatus_name = hostName;
      _webstatus_vhost = "status.immae.eu";
    };
  };
  service = [
    {
      passiveInfo = defaultPassiveInfo;
      service_description = "Size on root partition";
      use = "local-service";
      check_command = ["check_local_disk" "20%" "10%" "/"];
    }
    {
      passiveInfo = defaultPassiveInfo;
      service_description = "Total number of process";
      use = "local-service";
      check_command = [
        "check_local_procs"
        processWarn
        processAlert
        "RSZDT"
      ];
    }
    {
      passiveInfo = defaultPassiveInfo;
      service_description = "Network bandwidth";
      use = "local-service";
      check_interval = "2";
      max_check_attempts = "20";
      retry_interval = "2";
      check_command = [
        "check_local_bandwidth"
        interface
        "20480" # kb/s
        "51200" # kb/s
      ];
    }
    {
      passiveInfo = defaultPassiveInfo;
      service_description = "Average load";
      use = "local-service";
      check_command = [
        "check_local_load"
        "${loadWarn},${load5Warn},${load15Warn}"
        "${loadAlert},${load5Alert},${load15Alert}"
      ];
    }
    {
      passiveInfo = defaultPassiveInfo;
      service_description = "Swap usage";
      use = "local-service";
      check_command = ["check_local_swap" "20" "10"];
    }
    {
      passiveInfo = defaultPassiveInfo;
      service_description = "Memory usage";
      use = "local-service";
      check_command = ["check_memory" "80" "90"];
    }
    {
      passiveInfo = defaultPassiveInfo;
      service_description = "NTP is activated and working";
      use = "local-service";
      check_command = ["check_ntp"];
    }
    {
      passiveInfo = defaultPassiveInfo;
      service_description = "No mdadm array is degraded";
      use = "local-service";
      check_command = [
        "check_command_output"
        "${mdadm}/bin/mdadm --monitor --scan -1"
        "^$"
        "-s 0 -r root"
      ];
    }
  ];
  command = {
    check_dns = "$USER1$/check_dns -H $ARG1$ -s $HOSTADDRESS$ $ARG2$";
    check_emails = "$USER2$/check_emails -H $HOSTADDRESS$ -i $USER203$ -l $ARG1$ -p $ARG2$ -s $ARG3$ -f $ARG4$";
    check_emails_local = "$USER2$/check_emails -H $HOSTADDRESS$ -n $ARG1$ -r $ADMINEMAIL$ -s $ARG2$ -f $ARG3$";
    check_backup_eriomem = "$USER2$/check_eriomem $USER208$";
    check_backup_eriomem_age = "$USER2$/check_backup_eriomem_age $ARG1$";
    check_backup_ovh_age = "$USER2$/check_backup_ovh_age $ARG1$";
    check_external_dns = "$USER1$/check_dns -H $ARG2$ -s $ARG1$ $ARG3$";
    check_ftp_database = "$USER2$/check_ftp_database";
    check_git = "$USER2$/check_git $USER203$";
    check_http = "$USER1$/check_http --sni -f stickyport -H \"$ARG1$\" -u \"$ARG2$\" -r \"$ARG3$\"";
    check_https = "$USER1$/check_http --sni --ssl -f stickyport -H \"$ARG1$\" -u \"$ARG2$\" -r \"$ARG3$\"";
    check_https_auth = "$USER1$/check_http --sni --ssl -a \"$USER202$\" -f stickyport -H \"$ARG1$\" -u \"$ARG2$\" -r \"$ARG3$\"";
    check_https_certificate = "$USER1$/check_http --sni --ssl -H \"$ARG1$\" -C 21,15";
    check_https_code = "$USER1$/check_http --sni --ssl -f stickyport -H \"$ARG1$\" -u \"$ARG2$\" -e \"$ARG3$\" -r \"$ARG4$\"";
    check_imap_connection = "$USER2$/check_imap_connection -u \"$USER204$\" -p \"$USER205$\" -H \"imap.immae.eu:143\"";
    check_local_disk = "$USER1$/check_disk -w $ARG1$ -c $ARG2$ -p $ARG3$";
    check_local_procs = "$USER1$/check_procs -w $ARG1$ -c $ARG2$ -s $ARG3$";
    check_local_load = "$USER1$/check_load -r -w $ARG1$ -c $ARG2$";
    check_local_swap = "$USER1$/check_swap -n ok -w $ARG1$ -c $ARG2$";
    check_local_bandwidth = "$USER2$/check_bandwidth -i=$ARG1$ -w $ARG2$ -c $ARG3$";
    check_memory = "$USER2$/check_mem.sh -w $ARG1$ -c $ARG2$";
    check_command_match = "$USER2$/check_command -c \"$ARG1$\" -C \"$ARG2$\" $ARG3$";
    check_command_output = "$USER2$/check_command -c \"$ARG1$\" -s 0 -o \"$ARG2$\" $ARG3$";
    check_command_status = "$USER2$/check_command -c \"$ARG1$\" -s \"$ARG2$\" $ARG3$";
    check_ntp = "$USER1$/check_ntp_time -t 30 -q -H 0.arch.pool.ntp.org";
    check_mailq = "$USER1$/check_mailq -s -w 1 -c 2";
    check_megaraid = "$USER2$/check_megaraid_sas --sudo";
    check_mysql_replication = "${sudo} -u mysql $USER2$/check_mysql_replication \"$ARG1$\" \"$ARG2$\"";
    check_postgresql_replication = "${sudo} -u postgres $USER2$/check_postgres_replication \"$ARG1$\" \"$ARG2$\" \"$ARG3$\"";
    check_openldap_replication = "${sudo} -u openldap $USER2$/check_openldap_replication \"$ARG1$\" \"$ARG2$\" \"$ARG3$\" \"$ARG4$\" \"$ARG5$\"";
    check_ovh_sms = "$USER2$/check_ovh_sms \"$USER209$\"";
    check_redis_replication = "${sudo} -u redis $USER2$/check_redis_replication \"$ARG1$\"";
    check_smtp = "$USER1$/check_smtp -H $HOSTADDRESS$ -p 25 -S -D 21,15";
    check_tcp = "$USER1$/check_tcp -H $HOSTADDRESS$ -p $ARG1$ -e \"$ARG2$\" -Mcrit";
    check_tcp_ssl = "$USER1$/check_tcp -H $HOSTADDRESS$ -p $ARG1$ -S -D 21,15";
    check_zfs = "$USER2$/check_zpool.sh -p ALL -w 80 -c 90";

    check_host_alive = "$USER1$/check_ping -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 5";
    check_last_file_date = "${sudo} -u \"$ARG3$\" $USER2$/check_last_file_date \"$ARG1$\" \"$ARG2$\"";
    check_ok = "$USER1$/check_dummy 0 \"Dummy OK\"";
    check_critical = "$USER1$/check_dummy 2 \"Dummy CRITICAL\"";

    # $OVE is to force naemon to run via shell instead of execve which fails here
    notify-host-by-email = "ADMINEMAIL=\"$ADMINEMAIL$\" SERVICENOTIFICATIONID=\"$SERVICENOTIFICATIONID$\" HOSTSTATE=\"$HOSTSTATE$\" HOSTOUTPUT=\"$HOSTOUTPUT$\" $USER2$/notify_by_email host \"$NOTIFICATIONTYPE$\" \"$HOSTALIAS$\" \"$LONGDATETIME$\" \"$CONTACTEMAIL$\" $OVE";
    # $OVE is to force naemon to run via shell instead of execve which fails here
    notify-service-by-email = "ADMINEMAIL=\"$ADMINEMAIL$\" SERVICENOTIFICATIONID=\"$SERVICENOTIFICATIONID$\" SERVICEDESC=\"$SERVICEDESC$\" SERVICESTATE=\"$SERVICESTATE$\" SERVICEOUTPUT=\"$SERVICEOUTPUT$\" $USER2$/notify_by_email service \"$NOTIFICATIONTYPE$\" \"$HOSTALIAS$\" \"$LONGDATETIME$\" \"$CONTACTEMAIL$\" $OVE";
    notify-by-slack = "HOST=\"$HOSTALIAS$\" SERVICESTATE=\"$SERVICESTATE$\" SERVICEDESC=\"$SERVICEDESC$\" SERVICEOUTPUT=\"$SERVICEOUTPUT$\" $USER2$/notify_by_slack \"$ARG1$\" \"$ARG2$\"";
    notify-host-eban-url = "STATUS_NAME=\"Server\" USER=\"$USER210$\" PASSWORD=\"$USER211$\" HOSTSTATE=\"$HOSTSTATE$\" $USER2$/notify_eban_url";
    notify-service-eban-url = "STATUS_NAME=\"$_SERVICEWEBSTATUS_NAME$\" USER=\"$USER210$\" PASSWORD=\"$USER211$\" SERVICESTATE=\"$SERVICESTATE$\" $USER2$/notify_eban_url";

    notify-master = "$USER2$/send_nrdp.sh -u \"$USER200$\" -t \"$USER201$\" -H \"$HOSTADDRESS$\" -s \"$SERVICEDESC$\" -S \"$SERVICESTATEID$\" -o \"$SERVICEOUTPUT$ | $SERVICEPERFDATA$\"";
  };
  timeperiod = {
    "24x7" = {
      alias = "24 Hours A Day, 7 Days A Week";
      monday = "00:00-24:00";
      tuesday = "00:00-24:00";
      wednesday = "00:00-24:00";
      thursday = "00:00-24:00";
      friday = "00:00-24:00";
      saturday = "00:00-24:00";
      sunday = "00:00-24:00";
    };
  };
  servicegroup = {
    webstatus-webapps = { alias = "Web applications"; };
    webstatus-websites = { alias = "Personal websites"; };
    webstatus-ssl = { alias = "SSL certificates"; };
    webstatus-dns = { alias = "DNS resolution"; };
    webstatus-remote-services = { alias = "Other remote services"; };
    webstatus-local-services = { alias = "Other local services"; };
    webstatus-email = { alias = "E-mail services"; };
    webstatus-resources = { alias = "Local resources"; };
    webstatus-databases = { alias = "Databases resources"; };
    webstatus-backup = { alias = "Backup resources"; };
  };
  hostgroup = {
    webstatus-hosts   = { alias = "Hosts"; };
  };
  contactgroup = {
    admins = { alias = "Naemon Administrators"; };
  };
  templates = {
    service = {
      generic-service = {
        active_checks_enabled = "1";
        check_freshness = "0";
        check_interval = "10";
        check_period = "24x7";
        contact_groups = "admins";
        event_handler_enabled = "1";
        flap_detection_enabled = "1";
        is_volatile = "0";
        max_check_attempts = "3";
        notification_interval = "60";
        notification_options = "w,u,c,r,f,s";
        notification_period = "24x7";
        notifications_enabled = if master then "1" else "0";
        obsess_over_service = "1";
        passive_checks_enabled = "1";
        process_perf_data = "1";
        retain_nonstatus_information = "1";
        retain_status_information = "1";
        retry_interval = "2";
        _webstatus_namespace = "immae";
      };
      local-service = {
        use = "generic-service";
        host_name = hostFQDN;
        check_interval = "5";
        max_check_attempts = "4";
        retry_interval = "1";
        servicegroups = "webstatus-resources";
      };
      external-service = {
        use = "generic-service";
        check_interval = "5";
        max_check_attempts = "4";
        retry_interval = "1";
      };
      web-service = {
        use = "generic-service";
        check_interval = "20";
        max_check_attempts = "2";
        retry_interval = "1";
      };
      external-web-service = {
        use = "generic-service";
        check_interval = "20";
        max_check_attempts = "2";
        retry_interval = "1";
      };
      mail-service = {
        use = "generic-service";
        check_interval = "15";
        max_check_attempts = "1";
        retry_interval = "1";
      };
      dns-service = {
        use = "generic-service";
        check_interval = "120";
        notification_interval = "120";
        max_check_attempts = "5";
        retry_interval = "5";
      };
    };
    # No contact, we go through master
    contact = {
      generic-contact = {
        host_notification_commands = "notify-host-by-email";
        host_notification_options = "d,u,r,f,s";
        host_notification_period = "24x7";
        service_notification_commands = "notify-service-by-email";
        service_notification_options = "w,u,c,r,f,s";
        service_notification_period = "24x7";
      };
    };
    host = {
      generic-host = {
        event_handler_enabled = "1";
        flap_detection_enabled = "1";
        notification_period = "24x7";
        notifications_enabled = "1";
        process_perf_data = "1";
        retain_nonstatus_information = "1";
        retain_status_information = "1";
      };
      linux-server = {
        check_command = "check_host_alive";
        check_interval = "5";
        check_period = "24x7";
        contact_groups = "admins";
        max_check_attempts = "10";
        notification_interval = "120";
        notification_options = "d,u,r,f";
        retry_interval = "1";
        _webstatus_namespace = "immae";
      };
    };
  };
}