X-Git-Url: https://git.immae.eu/?p=perso%2FImmae%2FConfig%2FNix.git;a=blobdiff_plain;f=modules%2Fprivate%2Fmonitoring%2Fobjects_common.nix;h=82043ebdd0b58d7944cb875f9616b3d5c4db6e64;hp=66fb812bf1ba342cccd89614aa0a1324d34935ba;hb=7ad4966f41db0669a77c7a6ee7f87f0d4e586b0c;hpb=16b80abd57bb215d0e72f3983f997a007743b8fb diff --git a/modules/private/monitoring/objects_common.nix b/modules/private/monitoring/objects_common.nix index 66fb812..82043eb 100644 --- a/modules/private/monitoring/objects_common.nix +++ b/modules/private/monitoring/objects_common.nix @@ -1,27 +1,50 @@ { hostFQDN +, hostName +, interface ? "eth0" , processWarn ? "250" , processAlert ? "400" , loadWarn ? "8.0" +, load5Warn ? loadWarn +, load15Warn ? load5Warn , loadAlert ? "10.0" +, load5Alert ? loadAlert +, load15Alert ? load5Alert , mdadm , sudo +, master +, lib , ... }: +let + defaultPassiveInfo = { + filter = lib.attrsets.filterAttrs + (k: v: builtins.elem k ["service_description"] || builtins.substring 0 1 k == "_"); + use = "external-passive-service"; + freshness_threshold = "450"; + retry_interval = "1"; + servicegroups = "webstatus-resources"; + host_name = hostFQDN; + }; +in { host = { "${hostFQDN}" = { alias = hostFQDN; address = hostFQDN; use = "linux-server"; + hostgroups = "webstatus-hosts"; + _webstatus_name = hostName; }; }; service = [ { + passiveInfo = defaultPassiveInfo; service_description = "Size on root partition"; use = "local-service"; check_command = ["check_local_disk" "20%" "10%" "/"]; } { + passiveInfo = defaultPassiveInfo; service_description = "Total number of process"; use = "local-service"; check_command = [ @@ -32,30 +55,49 @@ ]; } { + passiveInfo = defaultPassiveInfo; + service_description = "Network bandwidth"; + use = "local-service"; + check_interval = "2"; + max_check_attempts = "20"; + retry_interval = "2"; + check_command = [ + "check_local_bandwidth" + interface + "20480" # kb/s + "51200" # kb/s + ]; + } + { + passiveInfo = defaultPassiveInfo; service_description = "Average load"; use = "local-service"; check_command = [ "check_local_load" - "${loadWarn},${loadWarn},${loadWarn}" - "${loadAlert},${loadAlert},${loadAlert}" + "${loadWarn},${load5Warn},${load15Warn}" + "${loadAlert},${load5Alert},${load15Alert}" ]; } { + passiveInfo = defaultPassiveInfo; service_description = "Swap usage"; use = "local-service"; check_command = ["check_local_swap" "20" "10"]; } { + passiveInfo = defaultPassiveInfo; service_description = "Memory usage"; use = "local-service"; check_command = ["check_memory" "80" "90"]; } { + passiveInfo = defaultPassiveInfo; service_description = "NTP is activated and working"; use = "local-service"; check_command = ["check_ntp"]; } { + passiveInfo = defaultPassiveInfo; service_description = "No mdadm array is degraded"; use = "local-service"; check_command = [ @@ -67,30 +109,53 @@ } ]; command = { + check_dns = "$USER1$/check_dns -H $ARG1$ -s $HOSTADDRESS$ $ARG2$"; + check_emails = "$USER2$/check_emails -H $HOSTADDRESS$ -i $USER203$ -l $ARG1$ -p $ARG2$ -s $ARG3$ -f $ARG4$"; + check_emails_local = "$USER2$/check_emails -H $HOSTADDRESS$ -n $ARG1$ -r $ADMINEMAIL$ -s $ARG2$ -f $ARG3$"; + check_backup_eriomem = "$USER2$/check_eriomem $USER208$"; + check_backup_eriomem_age = "$USER2$/check_backup_eriomem_age $ARG1$"; + check_backup_ovh_age = "$USER2$/check_backup_ovh_age $ARG1$"; + check_external_dns = "$USER1$/check_dns -H $ARG2$ -s $ARG1$ $ARG3$"; + check_ftp_database = "$USER2$/check_ftp_database"; + check_git = "$USER2$/check_git $USER203$"; + check_http = "$USER1$/check_http --sni -f stickyport -H \"$ARG1$\" -u \"$ARG2$\" -r \"$ARG3$\""; + check_https = "$USER1$/check_http --sni --ssl -f stickyport -H \"$ARG1$\" -u \"$ARG2$\" -r \"$ARG3$\""; + check_https_auth = "$USER1$/check_http --sni --ssl -a \"$USER202$\" -f stickyport -H \"$ARG1$\" -u \"$ARG2$\" -r \"$ARG3$\""; + check_https_certificate = "$USER1$/check_http --sni --ssl -H \"$ARG1$\" -C 21,15"; + check_https_code = "$USER1$/check_http --sni --ssl -f stickyport -H \"$ARG1$\" -u \"$ARG2$\" -e \"$ARG3$\" -r \"$ARG4$\""; + check_imap_connection = "$USER2$/check_imap_connection -u \"$USER204$\" -p \"$USER205$\" -H \"imap.immae.eu:143\""; check_local_disk = "$USER1$/check_disk -w $ARG1$ -c $ARG2$ -p $ARG3$"; check_local_procs = "$USER1$/check_procs -w $ARG1$ -c $ARG2$ -s $ARG3$"; check_local_load = "$USER1$/check_load -w $ARG1$ -c $ARG2$"; check_local_swap = "$USER1$/check_swap -n ok -w $ARG1$ -c $ARG2$"; + check_local_bandwidth = "$USER2$/check_bandwidth -i=$ARG1$ -w $ARG2$ -c $ARG3$"; check_memory = "$USER2$/check_mem.sh -w $ARG1$ -c $ARG2$"; + check_command_match = "$USER2$/check_command -c \"$ARG1$\" -C \"$ARG2$\" $ARG3$"; check_command_output = "$USER2$/check_command -c \"$ARG1$\" -s 0 -o \"$ARG2$\" $ARG3$"; + check_command_status = "$USER2$/check_command -c \"$ARG1$\" -s \"$ARG2$\" $ARG3$"; check_ntp = "$USER1$/check_ntp_time -t 30 -q -H 0.arch.pool.ntp.org"; + check_mailq = "$USER1$/check_mailq -s -w 1 -c 2"; check_mysql_replication = "${sudo} -u mysql $USER2$/check_mysql_replication \"$ARG1$\" \"$ARG2$\""; check_postgresql_replication = "${sudo} -u postgres $USER2$/check_postgres_replication \"$ARG1$\" \"$ARG2$\" \"$ARG3$\""; check_openldap_replication = "${sudo} -u openldap $USER2$/check_openldap_replication \"$ARG1$\" \"$ARG2$\" \"$ARG3$\" \"$ARG4$\" \"$ARG5$\""; + check_ovh_sms = "$USER2$/check_ovh_sms \"$USER209$\""; check_redis_replication = "${sudo} -u redis $USER2$/check_redis_replication \"$ARG1$\""; - check_mailq = "$USER1$/check_mailq -s -w 1 -c 2"; + check_smtp = "$USER1$/check_smtp -H $HOSTADDRESS$ -p 25 -S -D 21,15"; + check_tcp = "$USER1$/check_tcp -H $HOSTADDRESS$ -p $ARG1$ -e \"$ARG2$\" -Mcrit"; + check_tcp_ssl = "$USER1$/check_tcp -H $HOSTADDRESS$ -p $ARG1$ -S -D 21,15"; check_host_alive = "$USER1$/check_ping -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 5"; check_last_file_date = "${sudo} -u \"$ARG3$\" $USER2$/check_last_file_date \"$ARG1$\" \"$ARG2$\""; + check_ok = "$USER1$/check_dummy 0 \"Dummy OK\""; + check_critical = "$USER1$/check_dummy 2 \"Dummy CRITICAL\""; - # No notify commands, we go through master - # notify_host_by_email = "SERVICENOTIFICATIONID=\"$SERVICENOTIFICATIONID$\" HOSTSTATE=\"$HOSTSTATE$\" HOSTOUTPUT=\"$HOSTOUTPUT$\" $USER2$/notify_by_email host \"$NOTIFICATIONTYPE$\" \"$HOSTALIAS$\" \"$LONGDATETIME$\" \"$CONTACTEMAIL$\" $OVE"; - # #$OVE is to force naemon to run via shell instead of execve which fails here - # notify_service_by_email = "SERVICENOTIFICATIONID=\"$SERVICENOTIFICATIONID$\" SERVICEDESC=\"$SERVICEDESC$\" SERVICESTATE=\"$SERVICESTATE$\" SERVICEOUTPUT=\"$SERVICEOUTPUT$\" $USER2$/notify_by_email service \"$NOTIFICATIONTYPE$\" \"$HOSTALIAS$\" \"$LONGDATETIME$\" \"$CONTACTEMAIL$\" $OVE"; - # #sudo /usr/bin/strace -o /tmp/foo -vf -s 256 -u naemon $USER2$/notify_by_email - # #$OVE is to force naemon to run via shell instead of execve which fails here + # $OVE is to force naemon to run via shell instead of execve which fails here + notify-host-by-email = "ADMINEMAIL=\"$ADMINEMAIL$\" SERVICENOTIFICATIONID=\"$SERVICENOTIFICATIONID$\" HOSTSTATE=\"$HOSTSTATE$\" HOSTOUTPUT=\"$HOSTOUTPUT$\" $USER2$/notify_by_email host \"$NOTIFICATIONTYPE$\" \"$HOSTALIAS$\" \"$LONGDATETIME$\" \"$CONTACTEMAIL$\" $OVE"; + # $OVE is to force naemon to run via shell instead of execve which fails here + notify-service-by-email = "ADMINEMAIL=\"$ADMINEMAIL$\" SERVICENOTIFICATIONID=\"$SERVICENOTIFICATIONID$\" SERVICEDESC=\"$SERVICEDESC$\" SERVICESTATE=\"$SERVICESTATE$\" SERVICEOUTPUT=\"$SERVICEOUTPUT$\" $USER2$/notify_by_email service \"$NOTIFICATIONTYPE$\" \"$HOSTALIAS$\" \"$LONGDATETIME$\" \"$CONTACTEMAIL$\" $OVE"; + notify-by-slack = "HOST=\"$HOSTALIAS$\" SERVICESTATE=\"$SERVICESTATE$\" SERVICEDESC=\"$SERVICEDESC$\" SERVICEOUTPUT=\"$SERVICEOUTPUT$\" $USER2$/notify_by_slack \"$ARG1$\" \"$ARG2$\""; - notify-master = "$USER2$/send_nrdp.sh -u \"$USER200$\" -t \"$USER201$\" -H \"$HOSTADDRESS$\" -s \"$SERVICEDESC$\" -S \"$SERVICESTATEID$\" -o \"$SERVICEOUTPUT$\""; + notify-master = "$USER2$/send_nrdp.sh -u \"$USER200$\" -t \"$USER201$\" -H \"$HOSTADDRESS$\" -s \"$SERVICEDESC$\" -S \"$SERVICESTATEID$\" -o \"$SERVICEOUTPUT$ | $SERVICEPERFDATA$\""; }; timeperiod = { "24x7" = { @@ -104,17 +169,24 @@ sunday = "00:00-24:00"; }; }; + servicegroup = { + webstatus-webapps = { alias = "Web applications"; }; + webstatus-websites = { alias = "Personal websites"; }; + webstatus-ssl = { alias = "SSL certificates"; }; + webstatus-dns = { alias = "DNS resolution"; }; + webstatus-remote-services = { alias = "Other remote services"; }; + webstatus-local-services = { alias = "Other local services"; }; + webstatus-email = { alias = "E-mail services"; }; + webstatus-resources = { alias = "Local resources"; }; + webstatus-databases = { alias = "Databases resources"; }; + webstatus-backup = { alias = "Backup resources"; }; + }; + hostgroup = { + webstatus-hosts = { alias = "Hosts"; }; + }; contactgroup = { admins = { alias = "Naemon Administrators"; }; }; - # No contact, we go through master - # contact = { - # immae = { - # alias = "Immae"; - # use = "generic-contact"; - # email = "xxxxxxxxxxxxxxxx"; - # }; - # }; templates = { service = { generic-service = { @@ -130,7 +202,7 @@ notification_interval = "60"; notification_options = "w,u,c,r,f,s"; notification_period = "24x7"; - notifications_enabled = "0"; # no notification since we send them to master + notifications_enabled = if master then "1" else "0"; obsess_over_service = "1"; passive_checks_enabled = "1"; process_perf_data = "1"; @@ -144,19 +216,51 @@ check_interval = "5"; max_check_attempts = "4"; retry_interval = "1"; + servicegroups = "webstatus-resources"; + }; + external-service = { + use = "generic-service"; + check_interval = "5"; + max_check_attempts = "4"; + retry_interval = "1"; + }; + web-service = { + use = "generic-service"; + check_interval = "20"; + max_check_attempts = "2"; + retry_interval = "1"; + }; + external-web-service = { + use = "generic-service"; + check_interval = "20"; + max_check_attempts = "2"; + retry_interval = "1"; + }; + mail-service = { + use = "generic-service"; + check_interval = "15"; + max_check_attempts = "1"; + retry_interval = "1"; + }; + dns-service = { + use = "generic-service"; + check_interval = "120"; + notification_interval = "120"; + max_check_attempts = "5"; + retry_interval = "5"; }; }; # No contact, we go through master - # contact = { - # generic-contact = { - # host_notification_commands = "notify_host_by_email"; - # host_notification_options = "d,u,r,f,s"; - # host_notification_period = "24x7"; - # service_notification_commands = "notify_service_by_email"; - # service_notification_options = "w,u,c,r,f,s"; - # service_notification_period = "24x7"; - # }; - # }; + contact = { + generic-contact = { + host_notification_commands = "notify-host-by-email"; + host_notification_options = "d,u,r,f,s"; + host_notification_period = "24x7"; + service_notification_commands = "notify-service-by-email"; + service_notification_options = "w,u,c,r,f,s"; + service_notification_period = "24x7"; + }; + }; host = { generic-host = { event_handler_enabled = "1";