X-Git-Url: https://git.immae.eu/?a=blobdiff_plain;f=modules%2Fprivate%2Fmonitoring%2Fobjects_common.nix;h=7c9f6420e13fa1edfebff3239e08fd28678c0b02;hb=acab8301f6a0a76d97b36d009a60d4172bf62981;hp=1ab9fc3e179d3ced392eaefe714526c1c3486a67;hpb=6015a3b52c3b155ac444aeb39950c38a5e653101;p=perso%2FImmae%2FConfig%2FNix.git diff --git a/modules/private/monitoring/objects_common.nix b/modules/private/monitoring/objects_common.nix index 1ab9fc3..7c9f642 100644 --- a/modules/private/monitoring/objects_common.nix +++ b/modules/private/monitoring/objects_common.nix @@ -1,27 +1,51 @@ { hostFQDN +, hostName +, interface ? "eth0" , processWarn ? "250" , processAlert ? "400" -, loadWarn ? "8.0" -, loadAlert ? "10.0" +, loadWarn ? "0.9" +, load5Warn ? loadWarn +, load15Warn ? load5Warn +, loadAlert ? "1.0" +, load5Alert ? loadAlert +, load15Alert ? load5Alert , mdadm -, sudo +, master +, lib +, mypluginsConfig , ... }: +let + defaultPassiveInfo = { + filter = lib.attrsets.filterAttrs + (k: v: builtins.elem k ["service_description"] || builtins.substring 0 1 k == "_"); + use = "external-passive-service"; + freshness_threshold = "450"; + retry_interval = "1"; + servicegroups = "webstatus-resources"; + host_name = hostFQDN; + }; +in { host = { "${hostFQDN}" = { alias = hostFQDN; address = hostFQDN; use = "linux-server"; + hostgroups = "webstatus-hosts"; + _webstatus_name = hostName; + _webstatus_vhost = "status.immae.eu"; }; }; service = [ { + passiveInfo = defaultPassiveInfo; service_description = "Size on root partition"; use = "local-service"; check_command = ["check_local_disk" "20%" "10%" "/"]; } { + passiveInfo = defaultPassiveInfo; service_description = "Total number of process"; use = "local-service"; check_command = [ @@ -32,30 +56,49 @@ ]; } { + passiveInfo = defaultPassiveInfo; + service_description = "Network bandwidth"; + use = "local-service"; + check_interval = "2"; + max_check_attempts = "20"; + retry_interval = "2"; + check_command = [ + "check_local_bandwidth" + interface + "20480" # kb/s + "51200" # kb/s + ]; + } + { + passiveInfo = defaultPassiveInfo; service_description = "Average load"; use = "local-service"; check_command = [ "check_local_load" - "${loadWarn},${loadWarn},${loadWarn}" - "${loadAlert},${loadAlert},${loadAlert}" + "${loadWarn},${load5Warn},${load15Warn}" + "${loadAlert},${load5Alert},${load15Alert}" ]; } { + passiveInfo = defaultPassiveInfo; service_description = "Swap usage"; use = "local-service"; check_command = ["check_local_swap" "20" "10"]; } { + passiveInfo = defaultPassiveInfo; service_description = "Memory usage"; use = "local-service"; check_command = ["check_memory" "80" "90"]; } { + passiveInfo = defaultPassiveInfo; service_description = "NTP is activated and working"; use = "local-service"; check_command = ["check_ntp"]; } { + passiveInfo = defaultPassiveInfo; service_description = "No mdadm array is degraded"; use = "local-service"; check_command = [ @@ -66,30 +109,18 @@ ]; } ]; - command = { + command = lib.foldr (v: o: o // (v.commands or {})) {} (builtins.attrValues mypluginsConfig) // { check_local_disk = "$USER1$/check_disk -w $ARG1$ -c $ARG2$ -p $ARG3$"; check_local_procs = "$USER1$/check_procs -w $ARG1$ -c $ARG2$ -s $ARG3$"; - check_local_load = "$USER1$/check_load -w $ARG1$ -c $ARG2$"; + check_local_load = "$USER1$/check_load -r -w $ARG1$ -c $ARG2$"; check_local_swap = "$USER1$/check_swap -n ok -w $ARG1$ -c $ARG2$"; - check_memory = "$USER2$/check_mem.sh -w $ARG1$ -c $ARG2$"; - check_command_output = "$USER2$/check_command -c \"$ARG1$\" -s 0 -o \"$ARG2$\" $ARG3$"; check_ntp = "$USER1$/check_ntp_time -t 30 -q -H 0.arch.pool.ntp.org"; - check_mysql_replication = "${sudo} -u mysql $USER2$/check_mysql_replication \"$ARG1$\" \"$ARG2$\""; - check_postgresql_replication = "${sudo} -u postgres $USER2$/check_postgres_replication \"$ARG1$\" \"$ARG2$\" \"$ARG3$\""; - check_redis_replication = "${sudo} -u redis $USER2$/check_redis_replication \"$ARG1$\""; check_mailq = "$USER1$/check_mailq -s -w 1 -c 2"; + check_smtp = "$USER1$/check_smtp -H $HOSTADDRESS$ -p 25 -S -D 21,15"; check_host_alive = "$USER1$/check_ping -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 5"; - check_last_file_date = "${sudo} -u \"$ARG3$\" $USER2$/check_last_file_date \"$ARG1$\" \"$ARG2$\""; - - # No notify commands, we go through master - # notify_host_by_email = "SERVICENOTIFICATIONID=\"$SERVICENOTIFICATIONID$\" HOSTSTATE=\"$HOSTSTATE$\" HOSTOUTPUT=\"$HOSTOUTPUT$\" $USER2$/notify_by_email host \"$NOTIFICATIONTYPE$\" \"$HOSTALIAS$\" \"$LONGDATETIME$\" \"$CONTACTEMAIL$\" $OVE"; - # #$OVE is to force naemon to run via shell instead of execve which fails here - # notify_service_by_email = "SERVICENOTIFICATIONID=\"$SERVICENOTIFICATIONID$\" SERVICEDESC=\"$SERVICEDESC$\" SERVICESTATE=\"$SERVICESTATE$\" SERVICEOUTPUT=\"$SERVICEOUTPUT$\" $USER2$/notify_by_email service \"$NOTIFICATIONTYPE$\" \"$HOSTALIAS$\" \"$LONGDATETIME$\" \"$CONTACTEMAIL$\" $OVE"; - # #sudo /usr/bin/strace -o /tmp/foo -vf -s 256 -u naemon $USER2$/notify_by_email - # #$OVE is to force naemon to run via shell instead of execve which fails here - - notify-master = "$USER2$/send_nrdp.sh -u \"$USER200$\" -t \"$USER201$\" -H \"$HOSTADDRESS$\" -s \"$SERVICEDESC$\" -S \"$SERVICESTATEID$\" -o \"$SERVICEOUTPUT$\""; + check_ok = "$USER1$/check_dummy 0 \"Dummy OK\""; + check_critical = "$USER1$/check_dummy 2 \"Dummy CRITICAL\""; }; timeperiod = { "24x7" = { @@ -103,17 +134,24 @@ sunday = "00:00-24:00"; }; }; + servicegroup = { + webstatus-webapps = { alias = "Web applications"; }; + webstatus-websites = { alias = "Personal websites"; }; + webstatus-ssl = { alias = "SSL certificates"; }; + webstatus-dns = { alias = "DNS resolution"; }; + webstatus-remote-services = { alias = "Other remote services"; }; + webstatus-local-services = { alias = "Other local services"; }; + webstatus-email = { alias = "E-mail services"; }; + webstatus-resources = { alias = "Local resources"; }; + webstatus-databases = { alias = "Databases resources"; }; + webstatus-backup = { alias = "Backup resources"; }; + }; + hostgroup = { + webstatus-hosts = { alias = "Hosts"; }; + }; contactgroup = { admins = { alias = "Naemon Administrators"; }; }; - # No contact, we go through master - # contact = { - # immae = { - # alias = "Immae"; - # use = "generic-contact"; - # email = "xxxxxxxxxxxxxxxx"; - # }; - # }; templates = { service = { generic-service = { @@ -129,13 +167,14 @@ notification_interval = "60"; notification_options = "w,u,c,r,f,s"; notification_period = "24x7"; - notifications_enabled = "0"; # no notification since we send them to master + notifications_enabled = if master then "1" else "0"; obsess_over_service = "1"; passive_checks_enabled = "1"; process_perf_data = "1"; retain_nonstatus_information = "1"; retain_status_information = "1"; retry_interval = "2"; + _webstatus_namespace = "immae"; }; local-service = { use = "generic-service"; @@ -143,19 +182,51 @@ check_interval = "5"; max_check_attempts = "4"; retry_interval = "1"; + servicegroups = "webstatus-resources"; + }; + external-service = { + use = "generic-service"; + check_interval = "5"; + max_check_attempts = "4"; + retry_interval = "1"; + }; + web-service = { + use = "generic-service"; + check_interval = "20"; + max_check_attempts = "2"; + retry_interval = "1"; + }; + external-web-service = { + use = "generic-service"; + check_interval = "20"; + max_check_attempts = "2"; + retry_interval = "1"; + }; + mail-service = { + use = "generic-service"; + check_interval = "15"; + max_check_attempts = "1"; + retry_interval = "1"; + }; + dns-service = { + use = "generic-service"; + check_interval = "120"; + notification_interval = "120"; + max_check_attempts = "5"; + retry_interval = "5"; }; }; # No contact, we go through master - # contact = { - # generic-contact = { - # host_notification_commands = "notify_host_by_email"; - # host_notification_options = "d,u,r,f,s"; - # host_notification_period = "24x7"; - # service_notification_commands = "notify_service_by_email"; - # service_notification_options = "w,u,c,r,f,s"; - # service_notification_period = "24x7"; - # }; - # }; + contact = { + generic-contact = { + host_notification_commands = "notify-host-by-email"; + host_notification_options = "d,u,r,f,s"; + host_notification_period = "24x7"; + service_notification_commands = "notify-service-by-email"; + service_notification_options = "w,u,c,r,f,s"; + service_notification_period = "24x7"; + }; + }; host = { generic-host = { event_handler_enabled = "1"; @@ -175,6 +246,7 @@ notification_interval = "120"; notification_options = "d,u,r,f"; retry_interval = "1"; + _webstatus_namespace = "immae"; }; }; };