]> git.immae.eu Git - perso/Immae/Config/Nix.git/blobdiff - modules/private/monitoring/objects_common.nix
Adjust load warnings for monitoring
[perso/Immae/Config/Nix.git] / modules / private / monitoring / objects_common.nix
index 66fb812bf1ba342cccd89614aa0a1324d34935ba..82043ebdd0b58d7944cb875f9616b3d5c4db6e64 100644 (file)
@@ -1,27 +1,50 @@
 { hostFQDN
+, hostName
+, interface ? "eth0"
 , processWarn ? "250"
 , processAlert ? "400"
 , loadWarn ? "8.0"
+, load5Warn ? loadWarn
+, load15Warn ? load5Warn
 , loadAlert ? "10.0"
+, load5Alert ? loadAlert
+, load15Alert ? load5Alert
 , mdadm
 , sudo
+, master
+, lib
 , ...
 }:
+let
+  defaultPassiveInfo = {
+    filter = lib.attrsets.filterAttrs
+      (k: v: builtins.elem k ["service_description"] || builtins.substring 0 1 k == "_");
+    use = "external-passive-service";
+    freshness_threshold = "450";
+    retry_interval = "1";
+    servicegroups = "webstatus-resources";
+    host_name = hostFQDN;
+  };
+in
 {
   host = {
     "${hostFQDN}" = {
       alias = hostFQDN;
       address = hostFQDN;
       use = "linux-server";
+      hostgroups = "webstatus-hosts";
+      _webstatus_name = hostName;
     };
   };
   service = [
     {
+      passiveInfo = defaultPassiveInfo;
       service_description = "Size on root partition";
       use = "local-service";
       check_command = ["check_local_disk" "20%" "10%" "/"];
     }
     {
+      passiveInfo = defaultPassiveInfo;
       service_description = "Total number of process";
       use = "local-service";
       check_command = [
       ];
     }
     {
+      passiveInfo = defaultPassiveInfo;
+      service_description = "Network bandwidth";
+      use = "local-service";
+      check_interval = "2";
+      max_check_attempts = "20";
+      retry_interval = "2";
+      check_command = [
+        "check_local_bandwidth"
+        interface
+        "20480" # kb/s
+        "51200" # kb/s
+      ];
+    }
+    {
+      passiveInfo = defaultPassiveInfo;
       service_description = "Average load";
       use = "local-service";
       check_command = [
         "check_local_load"
-        "${loadWarn},${loadWarn},${loadWarn}"
-        "${loadAlert},${loadAlert},${loadAlert}"
+        "${loadWarn},${load5Warn},${load15Warn}"
+        "${loadAlert},${load5Alert},${load15Alert}"
       ];
     }
     {
+      passiveInfo = defaultPassiveInfo;
       service_description = "Swap usage";
       use = "local-service";
       check_command = ["check_local_swap" "20" "10"];
     }
     {
+      passiveInfo = defaultPassiveInfo;
       service_description = "Memory usage";
       use = "local-service";
       check_command = ["check_memory" "80" "90"];
     }
     {
+      passiveInfo = defaultPassiveInfo;
       service_description = "NTP is activated and working";
       use = "local-service";
       check_command = ["check_ntp"];
     }
     {
+      passiveInfo = defaultPassiveInfo;
       service_description = "No mdadm array is degraded";
       use = "local-service";
       check_command = [
     }
   ];
   command = {
+    check_dns = "$USER1$/check_dns -H $ARG1$ -s $HOSTADDRESS$ $ARG2$";
+    check_emails = "$USER2$/check_emails -H $HOSTADDRESS$ -i $USER203$ -l $ARG1$ -p $ARG2$ -s $ARG3$ -f $ARG4$";
+    check_emails_local = "$USER2$/check_emails -H $HOSTADDRESS$ -n $ARG1$ -r $ADMINEMAIL$ -s $ARG2$ -f $ARG3$";
+    check_backup_eriomem = "$USER2$/check_eriomem $USER208$";
+    check_backup_eriomem_age = "$USER2$/check_backup_eriomem_age $ARG1$";
+    check_backup_ovh_age = "$USER2$/check_backup_ovh_age $ARG1$";
+    check_external_dns = "$USER1$/check_dns -H $ARG2$ -s $ARG1$ $ARG3$";
+    check_ftp_database = "$USER2$/check_ftp_database";
+    check_git = "$USER2$/check_git $USER203$";
+    check_http = "$USER1$/check_http --sni -f stickyport -H \"$ARG1$\" -u \"$ARG2$\" -r \"$ARG3$\"";
+    check_https = "$USER1$/check_http --sni --ssl -f stickyport -H \"$ARG1$\" -u \"$ARG2$\" -r \"$ARG3$\"";
+    check_https_auth = "$USER1$/check_http --sni --ssl -a \"$USER202$\" -f stickyport -H \"$ARG1$\" -u \"$ARG2$\" -r \"$ARG3$\"";
+    check_https_certificate = "$USER1$/check_http --sni --ssl -H \"$ARG1$\" -C 21,15";
+    check_https_code = "$USER1$/check_http --sni --ssl -f stickyport -H \"$ARG1$\" -u \"$ARG2$\" -e \"$ARG3$\" -r \"$ARG4$\"";
+    check_imap_connection = "$USER2$/check_imap_connection -u \"$USER204$\" -p \"$USER205$\" -H \"imap.immae.eu:143\"";
     check_local_disk = "$USER1$/check_disk -w $ARG1$ -c $ARG2$ -p $ARG3$";
     check_local_procs = "$USER1$/check_procs -w $ARG1$ -c $ARG2$ -s $ARG3$";
     check_local_load = "$USER1$/check_load -w $ARG1$ -c $ARG2$";
     check_local_swap = "$USER1$/check_swap -n ok -w $ARG1$ -c $ARG2$";
+    check_local_bandwidth = "$USER2$/check_bandwidth -i=$ARG1$ -w $ARG2$ -c $ARG3$";
     check_memory = "$USER2$/check_mem.sh -w $ARG1$ -c $ARG2$";
+    check_command_match = "$USER2$/check_command -c \"$ARG1$\" -C \"$ARG2$\" $ARG3$";
     check_command_output = "$USER2$/check_command -c \"$ARG1$\" -s 0 -o \"$ARG2$\" $ARG3$";
+    check_command_status = "$USER2$/check_command -c \"$ARG1$\" -s \"$ARG2$\" $ARG3$";
     check_ntp = "$USER1$/check_ntp_time -t 30 -q -H 0.arch.pool.ntp.org";
+    check_mailq = "$USER1$/check_mailq -s -w 1 -c 2";
     check_mysql_replication = "${sudo} -u mysql $USER2$/check_mysql_replication \"$ARG1$\" \"$ARG2$\"";
     check_postgresql_replication = "${sudo} -u postgres $USER2$/check_postgres_replication \"$ARG1$\" \"$ARG2$\" \"$ARG3$\"";
     check_openldap_replication = "${sudo} -u openldap $USER2$/check_openldap_replication \"$ARG1$\" \"$ARG2$\" \"$ARG3$\" \"$ARG4$\" \"$ARG5$\"";
+    check_ovh_sms = "$USER2$/check_ovh_sms \"$USER209$\"";
     check_redis_replication = "${sudo} -u redis $USER2$/check_redis_replication \"$ARG1$\"";
-    check_mailq = "$USER1$/check_mailq -s -w 1 -c 2";
+    check_smtp = "$USER1$/check_smtp -H $HOSTADDRESS$ -p 25 -S -D 21,15";
+    check_tcp = "$USER1$/check_tcp -H $HOSTADDRESS$ -p $ARG1$ -e \"$ARG2$\" -Mcrit";
+    check_tcp_ssl = "$USER1$/check_tcp -H $HOSTADDRESS$ -p $ARG1$ -S -D 21,15";
 
     check_host_alive = "$USER1$/check_ping -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 5";
     check_last_file_date = "${sudo} -u \"$ARG3$\" $USER2$/check_last_file_date \"$ARG1$\" \"$ARG2$\"";
+    check_ok = "$USER1$/check_dummy 0 \"Dummy OK\"";
+    check_critical = "$USER1$/check_dummy 2 \"Dummy CRITICAL\"";
 
-    #  No notify commands, we go through master
-    #  notify_host_by_email = "SERVICENOTIFICATIONID=\"$SERVICENOTIFICATIONID$\" HOSTSTATE=\"$HOSTSTATE$\" HOSTOUTPUT=\"$HOSTOUTPUT$\" $USER2$/notify_by_email host \"$NOTIFICATIONTYPE$\" \"$HOSTALIAS$\" \"$LONGDATETIME$\" \"$CONTACTEMAIL$\" $OVE";
-    #  #$OVE is to force naemon to run via shell instead of execve which fails here
-    #  notify_service_by_email = "SERVICENOTIFICATIONID=\"$SERVICENOTIFICATIONID$\" SERVICEDESC=\"$SERVICEDESC$\" SERVICESTATE=\"$SERVICESTATE$\" SERVICEOUTPUT=\"$SERVICEOUTPUT$\" $USER2$/notify_by_email service \"$NOTIFICATIONTYPE$\" \"$HOSTALIAS$\" \"$LONGDATETIME$\" \"$CONTACTEMAIL$\" $OVE";
-    #  #sudo /usr/bin/strace -o /tmp/foo -vf -s 256 -u naemon $USER2$/notify_by_email
-    #  #$OVE is to force naemon to run via shell instead of execve which fails here
+    # $OVE is to force naemon to run via shell instead of execve which fails here
+    notify-host-by-email = "ADMINEMAIL=\"$ADMINEMAIL$\" SERVICENOTIFICATIONID=\"$SERVICENOTIFICATIONID$\" HOSTSTATE=\"$HOSTSTATE$\" HOSTOUTPUT=\"$HOSTOUTPUT$\" $USER2$/notify_by_email host \"$NOTIFICATIONTYPE$\" \"$HOSTALIAS$\" \"$LONGDATETIME$\" \"$CONTACTEMAIL$\" $OVE";
+    # $OVE is to force naemon to run via shell instead of execve which fails here
+    notify-service-by-email = "ADMINEMAIL=\"$ADMINEMAIL$\" SERVICENOTIFICATIONID=\"$SERVICENOTIFICATIONID$\" SERVICEDESC=\"$SERVICEDESC$\" SERVICESTATE=\"$SERVICESTATE$\" SERVICEOUTPUT=\"$SERVICEOUTPUT$\" $USER2$/notify_by_email service \"$NOTIFICATIONTYPE$\" \"$HOSTALIAS$\" \"$LONGDATETIME$\" \"$CONTACTEMAIL$\" $OVE";
+    notify-by-slack = "HOST=\"$HOSTALIAS$\" SERVICESTATE=\"$SERVICESTATE$\" SERVICEDESC=\"$SERVICEDESC$\" SERVICEOUTPUT=\"$SERVICEOUTPUT$\" $USER2$/notify_by_slack \"$ARG1$\" \"$ARG2$\"";
 
-    notify-master = "$USER2$/send_nrdp.sh -u \"$USER200$\" -t \"$USER201$\" -H \"$HOSTADDRESS$\" -s \"$SERVICEDESC$\" -S \"$SERVICESTATEID$\" -o \"$SERVICEOUTPUT$\"";
+    notify-master = "$USER2$/send_nrdp.sh -u \"$USER200$\" -t \"$USER201$\" -H \"$HOSTADDRESS$\" -s \"$SERVICEDESC$\" -S \"$SERVICESTATEID$\" -o \"$SERVICEOUTPUT$ | $SERVICEPERFDATA$\"";
   };
   timeperiod = {
     "24x7" = {
       sunday = "00:00-24:00";
     };
   };
+  servicegroup = {
+    webstatus-webapps = { alias = "Web applications"; };
+    webstatus-websites = { alias = "Personal websites"; };
+    webstatus-ssl = { alias = "SSL certificates"; };
+    webstatus-dns = { alias = "DNS resolution"; };
+    webstatus-remote-services = { alias = "Other remote services"; };
+    webstatus-local-services = { alias = "Other local services"; };
+    webstatus-email = { alias = "E-mail services"; };
+    webstatus-resources = { alias = "Local resources"; };
+    webstatus-databases = { alias = "Databases resources"; };
+    webstatus-backup = { alias = "Backup resources"; };
+  };
+  hostgroup = {
+    webstatus-hosts = { alias = "Hosts"; };
+  };
   contactgroup = {
     admins = { alias = "Naemon Administrators"; };
   };
-  # No contact, we go through master
-  # contact = {
-  #   immae = {
-  #     alias = "Immae";
-  #     use = "generic-contact";
-  #     email = "xxxxxxxxxxxxxxxx";
-  #   };
-  # };
   templates = {
     service = {
       generic-service = {
         notification_interval = "60";
         notification_options = "w,u,c,r,f,s";
         notification_period = "24x7";
-        notifications_enabled = "0"; # no notification since we send them to master
+        notifications_enabled = if master then "1" else "0";
         obsess_over_service = "1";
         passive_checks_enabled = "1";
         process_perf_data = "1";
         check_interval = "5";
         max_check_attempts = "4";
         retry_interval = "1";
+        servicegroups = "webstatus-resources";
+      };
+      external-service = {
+        use = "generic-service";
+        check_interval = "5";
+        max_check_attempts = "4";
+        retry_interval = "1";
+      };
+      web-service = {
+        use = "generic-service";
+        check_interval = "20";
+        max_check_attempts = "2";
+        retry_interval = "1";
+      };
+      external-web-service = {
+        use = "generic-service";
+        check_interval = "20";
+        max_check_attempts = "2";
+        retry_interval = "1";
+      };
+      mail-service = {
+        use = "generic-service";
+        check_interval = "15";
+        max_check_attempts = "1";
+        retry_interval = "1";
+      };
+      dns-service = {
+        use = "generic-service";
+        check_interval = "120";
+        notification_interval = "120";
+        max_check_attempts = "5";
+        retry_interval = "5";
       };
     };
     # No contact, we go through master
-    contact = {
-      generic-contact = {
-    #     host_notification_commands = "notify_host_by_email";
-        host_notification_options = "d,u,r,f,s";
-        host_notification_period = "24x7";
-    #     service_notification_commands = "notify_service_by_email";
-        service_notification_options = "w,u,c,r,f,s";
-        service_notification_period = "24x7";
-      };
-    };
+    contact = {
+      generic-contact = {
+        host_notification_commands = "notify-host-by-email";
+        host_notification_options = "d,u,r,f,s";
+        host_notification_period = "24x7";
+        service_notification_commands = "notify-service-by-email";
+        service_notification_options = "w,u,c,r,f,s";
+        service_notification_period = "24x7";
+      };
+    };
     host = {
       generic-host = {
         event_handler_enabled = "1";