diff options
author | Ismaël Bouya <ismael.bouya@normalesup.org> | 2020-01-05 17:08:32 +0100 |
---|---|---|
committer | Ismaël Bouya <ismael.bouya@normalesup.org> | 2020-01-05 17:08:32 +0100 |
commit | e820134d38c3b7470ea5112f40a6dc967f039878 (patch) | |
tree | f05a5cefe285d060aa0ebf52829bcfcd35549f8b /modules/private/monitoring/objects_common.nix | |
parent | b22ce4895ef1e9723a02061f7293e528cfbf9754 (diff) | |
download | Nix-e820134d38c3b7470ea5112f40a6dc967f039878.tar.gz Nix-e820134d38c3b7470ea5112f40a6dc967f039878.tar.zst Nix-e820134d38c3b7470ea5112f40a6dc967f039878.zip |
Add monitoring host
Diffstat (limited to 'modules/private/monitoring/objects_common.nix')
-rw-r--r-- | modules/private/monitoring/objects_common.nix | 132 |
1 files changed, 106 insertions, 26 deletions
diff --git a/modules/private/monitoring/objects_common.nix b/modules/private/monitoring/objects_common.nix index 66fb812..7467306 100644 --- a/modules/private/monitoring/objects_common.nix +++ b/modules/private/monitoring/objects_common.nix | |||
@@ -1,27 +1,45 @@ | |||
1 | { hostFQDN | 1 | { hostFQDN |
2 | , hostName | ||
2 | , processWarn ? "250" | 3 | , processWarn ? "250" |
3 | , processAlert ? "400" | 4 | , processAlert ? "400" |
4 | , loadWarn ? "8.0" | 5 | , loadWarn ? "8.0" |
5 | , loadAlert ? "10.0" | 6 | , loadAlert ? "10.0" |
6 | , mdadm | 7 | , mdadm |
7 | , sudo | 8 | , sudo |
9 | , master | ||
10 | , lib | ||
8 | , ... | 11 | , ... |
9 | }: | 12 | }: |
13 | let | ||
14 | defaultPassiveInfo = { | ||
15 | filter = lib.attrsets.filterAttrs | ||
16 | (k: v: builtins.elem k ["service_description"] || builtins.substring 0 1 k == "_"); | ||
17 | use = "external-passive-service"; | ||
18 | freshness_threshold = "450"; | ||
19 | retry_interval = "1"; | ||
20 | servicegroups = "webstatus-resources"; | ||
21 | host_name = hostFQDN; | ||
22 | }; | ||
23 | in | ||
10 | { | 24 | { |
11 | host = { | 25 | host = { |
12 | "${hostFQDN}" = { | 26 | "${hostFQDN}" = { |
13 | alias = hostFQDN; | 27 | alias = hostFQDN; |
14 | address = hostFQDN; | 28 | address = hostFQDN; |
15 | use = "linux-server"; | 29 | use = "linux-server"; |
30 | hostgroups = "webstatus-hosts"; | ||
31 | _webstatus_name = hostName; | ||
16 | }; | 32 | }; |
17 | }; | 33 | }; |
18 | service = [ | 34 | service = [ |
19 | { | 35 | { |
36 | passiveInfo = defaultPassiveInfo; | ||
20 | service_description = "Size on root partition"; | 37 | service_description = "Size on root partition"; |
21 | use = "local-service"; | 38 | use = "local-service"; |
22 | check_command = ["check_local_disk" "20%" "10%" "/"]; | 39 | check_command = ["check_local_disk" "20%" "10%" "/"]; |
23 | } | 40 | } |
24 | { | 41 | { |
42 | passiveInfo = defaultPassiveInfo; | ||
25 | service_description = "Total number of process"; | 43 | service_description = "Total number of process"; |
26 | use = "local-service"; | 44 | use = "local-service"; |
27 | check_command = [ | 45 | check_command = [ |
@@ -32,6 +50,7 @@ | |||
32 | ]; | 50 | ]; |
33 | } | 51 | } |
34 | { | 52 | { |
53 | passiveInfo = defaultPassiveInfo; | ||
35 | service_description = "Average load"; | 54 | service_description = "Average load"; |
36 | use = "local-service"; | 55 | use = "local-service"; |
37 | check_command = [ | 56 | check_command = [ |
@@ -41,21 +60,25 @@ | |||
41 | ]; | 60 | ]; |
42 | } | 61 | } |
43 | { | 62 | { |
63 | passiveInfo = defaultPassiveInfo; | ||
44 | service_description = "Swap usage"; | 64 | service_description = "Swap usage"; |
45 | use = "local-service"; | 65 | use = "local-service"; |
46 | check_command = ["check_local_swap" "20" "10"]; | 66 | check_command = ["check_local_swap" "20" "10"]; |
47 | } | 67 | } |
48 | { | 68 | { |
69 | passiveInfo = defaultPassiveInfo; | ||
49 | service_description = "Memory usage"; | 70 | service_description = "Memory usage"; |
50 | use = "local-service"; | 71 | use = "local-service"; |
51 | check_command = ["check_memory" "80" "90"]; | 72 | check_command = ["check_memory" "80" "90"]; |
52 | } | 73 | } |
53 | { | 74 | { |
75 | passiveInfo = defaultPassiveInfo; | ||
54 | service_description = "NTP is activated and working"; | 76 | service_description = "NTP is activated and working"; |
55 | use = "local-service"; | 77 | use = "local-service"; |
56 | check_command = ["check_ntp"]; | 78 | check_command = ["check_ntp"]; |
57 | } | 79 | } |
58 | { | 80 | { |
81 | passiveInfo = defaultPassiveInfo; | ||
59 | service_description = "No mdadm array is degraded"; | 82 | service_description = "No mdadm array is degraded"; |
60 | use = "local-service"; | 83 | use = "local-service"; |
61 | check_command = [ | 84 | check_command = [ |
@@ -67,28 +90,46 @@ | |||
67 | } | 90 | } |
68 | ]; | 91 | ]; |
69 | command = { | 92 | command = { |
93 | check_dns = "$USER1$/check_dns -H $ARG1$ -s $HOSTADDRESS$ $ARG2$"; | ||
94 | check_external_dns = "$USER1$/check_dns -H $ARG2$ -s $ARG1$ $ARG3$"; | ||
95 | check_ftp_database = "$USER2$/check_ftp_database"; | ||
96 | check_git = "$USER2$/check_git $USER203$"; | ||
97 | check_http = "$USER1$/check_http --sni -f stickyport -H \"$ARG1$\" -u \"$ARG2$\" -r \"$ARG3$\""; | ||
98 | check_https = "$USER1$/check_http --sni --ssl -f stickyport -H \"$ARG1$\" -u \"$ARG2$\" -r \"$ARG3$\""; | ||
99 | check_https_auth = "$USER1$/check_http --sni --ssl -a \"$USER202$\" -f stickyport -H \"$ARG1$\" -u \"$ARG2$\" -r \"$ARG3$\""; | ||
100 | check_https_certificate = "$USER1$/check_http --sni --ssl -H \"$ARG1$\" -C 21,15"; | ||
101 | check_https_code = "$USER1$/check_http --sni --ssl -f stickyport -H \"$ARG1$\" -u \"$ARG2$\" -e \"$ARG3$\" -r \"$ARG4$\""; | ||
102 | check_imap_connection = "$USER2$/check_imap_connection -u \"$USER204$\" -p \"$USER205$\" -H \"imap.immae.eu:143\""; | ||
70 | check_local_disk = "$USER1$/check_disk -w $ARG1$ -c $ARG2$ -p $ARG3$"; | 103 | check_local_disk = "$USER1$/check_disk -w $ARG1$ -c $ARG2$ -p $ARG3$"; |
71 | check_local_procs = "$USER1$/check_procs -w $ARG1$ -c $ARG2$ -s $ARG3$"; | 104 | check_local_procs = "$USER1$/check_procs -w $ARG1$ -c $ARG2$ -s $ARG3$"; |
72 | check_local_load = "$USER1$/check_load -w $ARG1$ -c $ARG2$"; | 105 | check_local_load = "$USER1$/check_load -w $ARG1$ -c $ARG2$"; |
73 | check_local_swap = "$USER1$/check_swap -n ok -w $ARG1$ -c $ARG2$"; | 106 | check_local_swap = "$USER1$/check_swap -n ok -w $ARG1$ -c $ARG2$"; |
74 | check_memory = "$USER2$/check_mem.sh -w $ARG1$ -c $ARG2$"; | 107 | check_memory = "$USER2$/check_mem.sh -w $ARG1$ -c $ARG2$"; |
108 | check_command_match = "$USER2$/check_command -c \"$ARG1$\" -C \"$ARG2$\" $ARG3$"; | ||
75 | check_command_output = "$USER2$/check_command -c \"$ARG1$\" -s 0 -o \"$ARG2$\" $ARG3$"; | 109 | check_command_output = "$USER2$/check_command -c \"$ARG1$\" -s 0 -o \"$ARG2$\" $ARG3$"; |
110 | check_command_status = "$USER2$/check_command -c \"$ARG1$\" -s \"$ARG2$\" $ARG3$"; | ||
76 | check_ntp = "$USER1$/check_ntp_time -t 30 -q -H 0.arch.pool.ntp.org"; | 111 | check_ntp = "$USER1$/check_ntp_time -t 30 -q -H 0.arch.pool.ntp.org"; |
112 | check_mailq = "$USER1$/check_mailq -s -w 1 -c 2"; | ||
113 | check_maison_bbc = "$USER2$/check_maison_bbc"; | ||
77 | check_mysql_replication = "${sudo} -u mysql $USER2$/check_mysql_replication \"$ARG1$\" \"$ARG2$\""; | 114 | check_mysql_replication = "${sudo} -u mysql $USER2$/check_mysql_replication \"$ARG1$\" \"$ARG2$\""; |
78 | check_postgresql_replication = "${sudo} -u postgres $USER2$/check_postgres_replication \"$ARG1$\" \"$ARG2$\" \"$ARG3$\""; | 115 | check_postgresql_replication = "${sudo} -u postgres $USER2$/check_postgres_replication \"$ARG1$\" \"$ARG2$\" \"$ARG3$\""; |
79 | check_openldap_replication = "${sudo} -u openldap $USER2$/check_openldap_replication \"$ARG1$\" \"$ARG2$\" \"$ARG3$\" \"$ARG4$\" \"$ARG5$\""; | 116 | check_openldap_replication = "${sudo} -u openldap $USER2$/check_openldap_replication \"$ARG1$\" \"$ARG2$\" \"$ARG3$\" \"$ARG4$\" \"$ARG5$\""; |
80 | check_redis_replication = "${sudo} -u redis $USER2$/check_redis_replication \"$ARG1$\""; | 117 | check_redis_replication = "${sudo} -u redis $USER2$/check_redis_replication \"$ARG1$\""; |
81 | check_mailq = "$USER1$/check_mailq -s -w 1 -c 2"; | 118 | check_smtp = "$USER1$/check_smtp -H $HOSTADDRESS$ -p 25 -S -D 21,15"; |
119 | check_tcp = "$USER1$/check_tcp -H $HOSTADDRESS$ -p $ARG1$ -e \"$ARG2$\" -Mcrit"; | ||
120 | check_tcp_ssl = "$USER1$/check_tcp -H $HOSTADDRESS$ -p $ARG1$ -S -D 21,15"; | ||
82 | 121 | ||
83 | check_host_alive = "$USER1$/check_ping -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 5"; | 122 | check_host_alive = "$USER1$/check_ping -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 5"; |
84 | check_last_file_date = "${sudo} -u \"$ARG3$\" $USER2$/check_last_file_date \"$ARG1$\" \"$ARG2$\""; | 123 | check_last_file_date = "${sudo} -u \"$ARG3$\" $USER2$/check_last_file_date \"$ARG1$\" \"$ARG2$\""; |
124 | check_ok = "$USER1$/check_dummy 0 \"Dummy OK\""; | ||
125 | check_critical = "$USER1$/check_dummy 2 \"Dummy CRITICAL\""; | ||
85 | 126 | ||
86 | # No notify commands, we go through master | 127 | # $OVE is to force naemon to run via shell instead of execve which fails here |
87 | # notify_host_by_email = "SERVICENOTIFICATIONID=\"$SERVICENOTIFICATIONID$\" HOSTSTATE=\"$HOSTSTATE$\" HOSTOUTPUT=\"$HOSTOUTPUT$\" $USER2$/notify_by_email host \"$NOTIFICATIONTYPE$\" \"$HOSTALIAS$\" \"$LONGDATETIME$\" \"$CONTACTEMAIL$\" $OVE"; | 128 | notify-host-by-email = "ADMINEMAIL=\"$ADMINEMAIL$\" SERVICENOTIFICATIONID=\"$SERVICENOTIFICATIONID$\" HOSTSTATE=\"$HOSTSTATE$\" HOSTOUTPUT=\"$HOSTOUTPUT$\" $USER2$/notify_by_email host \"$NOTIFICATIONTYPE$\" \"$HOSTALIAS$\" \"$LONGDATETIME$\" \"$CONTACTEMAIL$\" $OVE"; |
88 | # #$OVE is to force naemon to run via shell instead of execve which fails here | 129 | # $OVE is to force naemon to run via shell instead of execve which fails here |
89 | # notify_service_by_email = "SERVICENOTIFICATIONID=\"$SERVICENOTIFICATIONID$\" SERVICEDESC=\"$SERVICEDESC$\" SERVICESTATE=\"$SERVICESTATE$\" SERVICEOUTPUT=\"$SERVICEOUTPUT$\" $USER2$/notify_by_email service \"$NOTIFICATIONTYPE$\" \"$HOSTALIAS$\" \"$LONGDATETIME$\" \"$CONTACTEMAIL$\" $OVE"; | 130 | notify-service-by-email = "ADMINEMAIL=\"$ADMINEMAIL$\" SERVICENOTIFICATIONID=\"$SERVICENOTIFICATIONID$\" SERVICEDESC=\"$SERVICEDESC$\" SERVICESTATE=\"$SERVICESTATE$\" SERVICEOUTPUT=\"$SERVICEOUTPUT$\" $USER2$/notify_by_email service \"$NOTIFICATIONTYPE$\" \"$HOSTALIAS$\" \"$LONGDATETIME$\" \"$CONTACTEMAIL$\" $OVE"; |
90 | # #sudo /usr/bin/strace -o /tmp/foo -vf -s 256 -u naemon $USER2$/notify_by_email | 131 | notify-maison-bbc-by-email = "ADMINEMAIL=\"$ADMINEMAIL$\" SERVICENOTIFICATIONID=\"$SERVICENOTIFICATIONID$\" SERVICEDESC=\"$SERVICEDESC$\" SERVICESTATE=\"$SERVICESTATE$\" SERVICEOUTPUT=\"$SERVICEOUTPUT$\" $USER2$/notify_maison_bbc_by_email service \"$NOTIFICATIONTYPE$\" \"$HOSTALIAS$\" \"$LONGDATETIME$\" \"$CONTACTEMAIL$\" $OVE"; |
91 | # #$OVE is to force naemon to run via shell instead of execve which fails here | 132 | notify-by-slack = "HOST=\"$HOSTALIAS$\" SERVICESTATE=\"$SERVICESTATE$\" SERVICEDESC=\"$SERVICEDESC$\" SERVICEOUTPUT=\"$SERVICEOUTPUT$\" $USER2$/notify_by_slack \"$ARG1$\" \"$ARG2$\""; |
92 | 133 | ||
93 | notify-master = "$USER2$/send_nrdp.sh -u \"$USER200$\" -t \"$USER201$\" -H \"$HOSTADDRESS$\" -s \"$SERVICEDESC$\" -S \"$SERVICESTATEID$\" -o \"$SERVICEOUTPUT$\""; | 134 | notify-master = "$USER2$/send_nrdp.sh -u \"$USER200$\" -t \"$USER201$\" -H \"$HOSTADDRESS$\" -s \"$SERVICEDESC$\" -S \"$SERVICESTATEID$\" -o \"$SERVICEOUTPUT$\""; |
94 | }; | 135 | }; |
@@ -104,17 +145,24 @@ | |||
104 | sunday = "00:00-24:00"; | 145 | sunday = "00:00-24:00"; |
105 | }; | 146 | }; |
106 | }; | 147 | }; |
148 | servicegroup = { | ||
149 | webstatus-webapps = { alias = "Web applications"; }; | ||
150 | webstatus-websites = { alias = "Personal websites"; }; | ||
151 | webstatus-ssl = { alias = "SSL certificates"; }; | ||
152 | webstatus-dns = { alias = "DNS resolution"; }; | ||
153 | webstatus-remote-services = { alias = "Other remote services"; }; | ||
154 | webstatus-local-services = { alias = "Other local services"; }; | ||
155 | webstatus-email = { alias = "E-mail services"; }; | ||
156 | webstatus-resources = { alias = "Local resources"; }; | ||
157 | webstatus-databases = { alias = "Databases resources"; }; | ||
158 | webstatus-backup = { alias = "Backup resources"; }; | ||
159 | }; | ||
160 | hostgroup = { | ||
161 | webstatus-hosts = { alias = "Hosts"; }; | ||
162 | }; | ||
107 | contactgroup = { | 163 | contactgroup = { |
108 | admins = { alias = "Naemon Administrators"; }; | 164 | admins = { alias = "Naemon Administrators"; }; |
109 | }; | 165 | }; |
110 | # No contact, we go through master | ||
111 | # contact = { | ||
112 | # immae = { | ||
113 | # alias = "Immae"; | ||
114 | # use = "generic-contact"; | ||
115 | # email = "xxxxxxxxxxxxxxxx"; | ||
116 | # }; | ||
117 | # }; | ||
118 | templates = { | 166 | templates = { |
119 | service = { | 167 | service = { |
120 | generic-service = { | 168 | generic-service = { |
@@ -130,7 +178,7 @@ | |||
130 | notification_interval = "60"; | 178 | notification_interval = "60"; |
131 | notification_options = "w,u,c,r,f,s"; | 179 | notification_options = "w,u,c,r,f,s"; |
132 | notification_period = "24x7"; | 180 | notification_period = "24x7"; |
133 | notifications_enabled = "0"; # no notification since we send them to master | 181 | notifications_enabled = if master then "1" else "0"; |
134 | obsess_over_service = "1"; | 182 | obsess_over_service = "1"; |
135 | passive_checks_enabled = "1"; | 183 | passive_checks_enabled = "1"; |
136 | process_perf_data = "1"; | 184 | process_perf_data = "1"; |
@@ -144,19 +192,51 @@ | |||
144 | check_interval = "5"; | 192 | check_interval = "5"; |
145 | max_check_attempts = "4"; | 193 | max_check_attempts = "4"; |
146 | retry_interval = "1"; | 194 | retry_interval = "1"; |
195 | servicegroups = "webstatus-resources"; | ||
196 | }; | ||
197 | external-service = { | ||
198 | use = "generic-service"; | ||
199 | check_interval = "5"; | ||
200 | max_check_attempts = "4"; | ||
201 | retry_interval = "1"; | ||
202 | }; | ||
203 | web-service = { | ||
204 | use = "generic-service"; | ||
205 | check_interval = "20"; | ||
206 | max_check_attempts = "2"; | ||
207 | retry_interval = "1"; | ||
208 | }; | ||
209 | external-web-service = { | ||
210 | use = "generic-service"; | ||
211 | check_interval = "20"; | ||
212 | max_check_attempts = "2"; | ||
213 | retry_interval = "1"; | ||
214 | }; | ||
215 | mail-service = { | ||
216 | use = "generic-service"; | ||
217 | check_interval = "15"; | ||
218 | max_check_attempts = "1"; | ||
219 | retry_interval = "1"; | ||
220 | }; | ||
221 | dns-service = { | ||
222 | use = "generic-service"; | ||
223 | check_interval = "120"; | ||
224 | notification_interval = "120"; | ||
225 | max_check_attempts = "5"; | ||
226 | retry_interval = "5"; | ||
147 | }; | 227 | }; |
148 | }; | 228 | }; |
149 | # No contact, we go through master | 229 | # No contact, we go through master |
150 | # contact = { | 230 | contact = { |
151 | # generic-contact = { | 231 | generic-contact = { |
152 | # host_notification_commands = "notify_host_by_email"; | 232 | host_notification_commands = "notify-host-by-email"; |
153 | # host_notification_options = "d,u,r,f,s"; | 233 | host_notification_options = "d,u,r,f,s"; |
154 | # host_notification_period = "24x7"; | 234 | host_notification_period = "24x7"; |
155 | # service_notification_commands = "notify_service_by_email"; | 235 | service_notification_commands = "notify-service-by-email"; |
156 | # service_notification_options = "w,u,c,r,f,s"; | 236 | service_notification_options = "w,u,c,r,f,s"; |
157 | # service_notification_period = "24x7"; | 237 | service_notification_period = "24x7"; |
158 | # }; | 238 | }; |
159 | # }; | 239 | }; |
160 | host = { | 240 | host = { |
161 | generic-host = { | 241 | generic-host = { |
162 | event_handler_enabled = "1"; | 242 | event_handler_enabled = "1"; |