aboutsummaryrefslogtreecommitdiff
path: root/modules/private/monitoring/objects_common.nix
diff options
context:
space:
mode:
authorIsmaël Bouya <ismael.bouya@normalesup.org>2020-01-05 17:08:32 +0100
committerIsmaël Bouya <ismael.bouya@normalesup.org>2020-01-05 17:08:32 +0100
commite820134d38c3b7470ea5112f40a6dc967f039878 (patch)
treef05a5cefe285d060aa0ebf52829bcfcd35549f8b /modules/private/monitoring/objects_common.nix
parentb22ce4895ef1e9723a02061f7293e528cfbf9754 (diff)
downloadNix-e820134d38c3b7470ea5112f40a6dc967f039878.tar.gz
Nix-e820134d38c3b7470ea5112f40a6dc967f039878.tar.zst
Nix-e820134d38c3b7470ea5112f40a6dc967f039878.zip
Add monitoring host
Diffstat (limited to 'modules/private/monitoring/objects_common.nix')
-rw-r--r--modules/private/monitoring/objects_common.nix132
1 files changed, 106 insertions, 26 deletions
diff --git a/modules/private/monitoring/objects_common.nix b/modules/private/monitoring/objects_common.nix
index 66fb812..7467306 100644
--- a/modules/private/monitoring/objects_common.nix
+++ b/modules/private/monitoring/objects_common.nix
@@ -1,27 +1,45 @@
1{ hostFQDN 1{ hostFQDN
2, hostName
2, processWarn ? "250" 3, processWarn ? "250"
3, processAlert ? "400" 4, processAlert ? "400"
4, loadWarn ? "8.0" 5, loadWarn ? "8.0"
5, loadAlert ? "10.0" 6, loadAlert ? "10.0"
6, mdadm 7, mdadm
7, sudo 8, sudo
9, master
10, lib
8, ... 11, ...
9}: 12}:
13let
14 defaultPassiveInfo = {
15 filter = lib.attrsets.filterAttrs
16 (k: v: builtins.elem k ["service_description"] || builtins.substring 0 1 k == "_");
17 use = "external-passive-service";
18 freshness_threshold = "450";
19 retry_interval = "1";
20 servicegroups = "webstatus-resources";
21 host_name = hostFQDN;
22 };
23in
10{ 24{
11 host = { 25 host = {
12 "${hostFQDN}" = { 26 "${hostFQDN}" = {
13 alias = hostFQDN; 27 alias = hostFQDN;
14 address = hostFQDN; 28 address = hostFQDN;
15 use = "linux-server"; 29 use = "linux-server";
30 hostgroups = "webstatus-hosts";
31 _webstatus_name = hostName;
16 }; 32 };
17 }; 33 };
18 service = [ 34 service = [
19 { 35 {
36 passiveInfo = defaultPassiveInfo;
20 service_description = "Size on root partition"; 37 service_description = "Size on root partition";
21 use = "local-service"; 38 use = "local-service";
22 check_command = ["check_local_disk" "20%" "10%" "/"]; 39 check_command = ["check_local_disk" "20%" "10%" "/"];
23 } 40 }
24 { 41 {
42 passiveInfo = defaultPassiveInfo;
25 service_description = "Total number of process"; 43 service_description = "Total number of process";
26 use = "local-service"; 44 use = "local-service";
27 check_command = [ 45 check_command = [
@@ -32,6 +50,7 @@
32 ]; 50 ];
33 } 51 }
34 { 52 {
53 passiveInfo = defaultPassiveInfo;
35 service_description = "Average load"; 54 service_description = "Average load";
36 use = "local-service"; 55 use = "local-service";
37 check_command = [ 56 check_command = [
@@ -41,21 +60,25 @@
41 ]; 60 ];
42 } 61 }
43 { 62 {
63 passiveInfo = defaultPassiveInfo;
44 service_description = "Swap usage"; 64 service_description = "Swap usage";
45 use = "local-service"; 65 use = "local-service";
46 check_command = ["check_local_swap" "20" "10"]; 66 check_command = ["check_local_swap" "20" "10"];
47 } 67 }
48 { 68 {
69 passiveInfo = defaultPassiveInfo;
49 service_description = "Memory usage"; 70 service_description = "Memory usage";
50 use = "local-service"; 71 use = "local-service";
51 check_command = ["check_memory" "80" "90"]; 72 check_command = ["check_memory" "80" "90"];
52 } 73 }
53 { 74 {
75 passiveInfo = defaultPassiveInfo;
54 service_description = "NTP is activated and working"; 76 service_description = "NTP is activated and working";
55 use = "local-service"; 77 use = "local-service";
56 check_command = ["check_ntp"]; 78 check_command = ["check_ntp"];
57 } 79 }
58 { 80 {
81 passiveInfo = defaultPassiveInfo;
59 service_description = "No mdadm array is degraded"; 82 service_description = "No mdadm array is degraded";
60 use = "local-service"; 83 use = "local-service";
61 check_command = [ 84 check_command = [
@@ -67,28 +90,46 @@
67 } 90 }
68 ]; 91 ];
69 command = { 92 command = {
93 check_dns = "$USER1$/check_dns -H $ARG1$ -s $HOSTADDRESS$ $ARG2$";
94 check_external_dns = "$USER1$/check_dns -H $ARG2$ -s $ARG1$ $ARG3$";
95 check_ftp_database = "$USER2$/check_ftp_database";
96 check_git = "$USER2$/check_git $USER203$";
97 check_http = "$USER1$/check_http --sni -f stickyport -H \"$ARG1$\" -u \"$ARG2$\" -r \"$ARG3$\"";
98 check_https = "$USER1$/check_http --sni --ssl -f stickyport -H \"$ARG1$\" -u \"$ARG2$\" -r \"$ARG3$\"";
99 check_https_auth = "$USER1$/check_http --sni --ssl -a \"$USER202$\" -f stickyport -H \"$ARG1$\" -u \"$ARG2$\" -r \"$ARG3$\"";
100 check_https_certificate = "$USER1$/check_http --sni --ssl -H \"$ARG1$\" -C 21,15";
101 check_https_code = "$USER1$/check_http --sni --ssl -f stickyport -H \"$ARG1$\" -u \"$ARG2$\" -e \"$ARG3$\" -r \"$ARG4$\"";
102 check_imap_connection = "$USER2$/check_imap_connection -u \"$USER204$\" -p \"$USER205$\" -H \"imap.immae.eu:143\"";
70 check_local_disk = "$USER1$/check_disk -w $ARG1$ -c $ARG2$ -p $ARG3$"; 103 check_local_disk = "$USER1$/check_disk -w $ARG1$ -c $ARG2$ -p $ARG3$";
71 check_local_procs = "$USER1$/check_procs -w $ARG1$ -c $ARG2$ -s $ARG3$"; 104 check_local_procs = "$USER1$/check_procs -w $ARG1$ -c $ARG2$ -s $ARG3$";
72 check_local_load = "$USER1$/check_load -w $ARG1$ -c $ARG2$"; 105 check_local_load = "$USER1$/check_load -w $ARG1$ -c $ARG2$";
73 check_local_swap = "$USER1$/check_swap -n ok -w $ARG1$ -c $ARG2$"; 106 check_local_swap = "$USER1$/check_swap -n ok -w $ARG1$ -c $ARG2$";
74 check_memory = "$USER2$/check_mem.sh -w $ARG1$ -c $ARG2$"; 107 check_memory = "$USER2$/check_mem.sh -w $ARG1$ -c $ARG2$";
108 check_command_match = "$USER2$/check_command -c \"$ARG1$\" -C \"$ARG2$\" $ARG3$";
75 check_command_output = "$USER2$/check_command -c \"$ARG1$\" -s 0 -o \"$ARG2$\" $ARG3$"; 109 check_command_output = "$USER2$/check_command -c \"$ARG1$\" -s 0 -o \"$ARG2$\" $ARG3$";
110 check_command_status = "$USER2$/check_command -c \"$ARG1$\" -s \"$ARG2$\" $ARG3$";
76 check_ntp = "$USER1$/check_ntp_time -t 30 -q -H 0.arch.pool.ntp.org"; 111 check_ntp = "$USER1$/check_ntp_time -t 30 -q -H 0.arch.pool.ntp.org";
112 check_mailq = "$USER1$/check_mailq -s -w 1 -c 2";
113 check_maison_bbc = "$USER2$/check_maison_bbc";
77 check_mysql_replication = "${sudo} -u mysql $USER2$/check_mysql_replication \"$ARG1$\" \"$ARG2$\""; 114 check_mysql_replication = "${sudo} -u mysql $USER2$/check_mysql_replication \"$ARG1$\" \"$ARG2$\"";
78 check_postgresql_replication = "${sudo} -u postgres $USER2$/check_postgres_replication \"$ARG1$\" \"$ARG2$\" \"$ARG3$\""; 115 check_postgresql_replication = "${sudo} -u postgres $USER2$/check_postgres_replication \"$ARG1$\" \"$ARG2$\" \"$ARG3$\"";
79 check_openldap_replication = "${sudo} -u openldap $USER2$/check_openldap_replication \"$ARG1$\" \"$ARG2$\" \"$ARG3$\" \"$ARG4$\" \"$ARG5$\""; 116 check_openldap_replication = "${sudo} -u openldap $USER2$/check_openldap_replication \"$ARG1$\" \"$ARG2$\" \"$ARG3$\" \"$ARG4$\" \"$ARG5$\"";
80 check_redis_replication = "${sudo} -u redis $USER2$/check_redis_replication \"$ARG1$\""; 117 check_redis_replication = "${sudo} -u redis $USER2$/check_redis_replication \"$ARG1$\"";
81 check_mailq = "$USER1$/check_mailq -s -w 1 -c 2"; 118 check_smtp = "$USER1$/check_smtp -H $HOSTADDRESS$ -p 25 -S -D 21,15";
119 check_tcp = "$USER1$/check_tcp -H $HOSTADDRESS$ -p $ARG1$ -e \"$ARG2$\" -Mcrit";
120 check_tcp_ssl = "$USER1$/check_tcp -H $HOSTADDRESS$ -p $ARG1$ -S -D 21,15";
82 121
83 check_host_alive = "$USER1$/check_ping -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 5"; 122 check_host_alive = "$USER1$/check_ping -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 5";
84 check_last_file_date = "${sudo} -u \"$ARG3$\" $USER2$/check_last_file_date \"$ARG1$\" \"$ARG2$\""; 123 check_last_file_date = "${sudo} -u \"$ARG3$\" $USER2$/check_last_file_date \"$ARG1$\" \"$ARG2$\"";
124 check_ok = "$USER1$/check_dummy 0 \"Dummy OK\"";
125 check_critical = "$USER1$/check_dummy 2 \"Dummy CRITICAL\"";
85 126
86 # No notify commands, we go through master 127 # $OVE is to force naemon to run via shell instead of execve which fails here
87 # notify_host_by_email = "SERVICENOTIFICATIONID=\"$SERVICENOTIFICATIONID$\" HOSTSTATE=\"$HOSTSTATE$\" HOSTOUTPUT=\"$HOSTOUTPUT$\" $USER2$/notify_by_email host \"$NOTIFICATIONTYPE$\" \"$HOSTALIAS$\" \"$LONGDATETIME$\" \"$CONTACTEMAIL$\" $OVE"; 128 notify-host-by-email = "ADMINEMAIL=\"$ADMINEMAIL$\" SERVICENOTIFICATIONID=\"$SERVICENOTIFICATIONID$\" HOSTSTATE=\"$HOSTSTATE$\" HOSTOUTPUT=\"$HOSTOUTPUT$\" $USER2$/notify_by_email host \"$NOTIFICATIONTYPE$\" \"$HOSTALIAS$\" \"$LONGDATETIME$\" \"$CONTACTEMAIL$\" $OVE";
88 # #$OVE is to force naemon to run via shell instead of execve which fails here 129 # $OVE is to force naemon to run via shell instead of execve which fails here
89 # notify_service_by_email = "SERVICENOTIFICATIONID=\"$SERVICENOTIFICATIONID$\" SERVICEDESC=\"$SERVICEDESC$\" SERVICESTATE=\"$SERVICESTATE$\" SERVICEOUTPUT=\"$SERVICEOUTPUT$\" $USER2$/notify_by_email service \"$NOTIFICATIONTYPE$\" \"$HOSTALIAS$\" \"$LONGDATETIME$\" \"$CONTACTEMAIL$\" $OVE"; 130 notify-service-by-email = "ADMINEMAIL=\"$ADMINEMAIL$\" SERVICENOTIFICATIONID=\"$SERVICENOTIFICATIONID$\" SERVICEDESC=\"$SERVICEDESC$\" SERVICESTATE=\"$SERVICESTATE$\" SERVICEOUTPUT=\"$SERVICEOUTPUT$\" $USER2$/notify_by_email service \"$NOTIFICATIONTYPE$\" \"$HOSTALIAS$\" \"$LONGDATETIME$\" \"$CONTACTEMAIL$\" $OVE";
90 # #sudo /usr/bin/strace -o /tmp/foo -vf -s 256 -u naemon $USER2$/notify_by_email 131 notify-maison-bbc-by-email = "ADMINEMAIL=\"$ADMINEMAIL$\" SERVICENOTIFICATIONID=\"$SERVICENOTIFICATIONID$\" SERVICEDESC=\"$SERVICEDESC$\" SERVICESTATE=\"$SERVICESTATE$\" SERVICEOUTPUT=\"$SERVICEOUTPUT$\" $USER2$/notify_maison_bbc_by_email service \"$NOTIFICATIONTYPE$\" \"$HOSTALIAS$\" \"$LONGDATETIME$\" \"$CONTACTEMAIL$\" $OVE";
91 # #$OVE is to force naemon to run via shell instead of execve which fails here 132 notify-by-slack = "HOST=\"$HOSTALIAS$\" SERVICESTATE=\"$SERVICESTATE$\" SERVICEDESC=\"$SERVICEDESC$\" SERVICEOUTPUT=\"$SERVICEOUTPUT$\" $USER2$/notify_by_slack \"$ARG1$\" \"$ARG2$\"";
92 133
93 notify-master = "$USER2$/send_nrdp.sh -u \"$USER200$\" -t \"$USER201$\" -H \"$HOSTADDRESS$\" -s \"$SERVICEDESC$\" -S \"$SERVICESTATEID$\" -o \"$SERVICEOUTPUT$\""; 134 notify-master = "$USER2$/send_nrdp.sh -u \"$USER200$\" -t \"$USER201$\" -H \"$HOSTADDRESS$\" -s \"$SERVICEDESC$\" -S \"$SERVICESTATEID$\" -o \"$SERVICEOUTPUT$\"";
94 }; 135 };
@@ -104,17 +145,24 @@
104 sunday = "00:00-24:00"; 145 sunday = "00:00-24:00";
105 }; 146 };
106 }; 147 };
148 servicegroup = {
149 webstatus-webapps = { alias = "Web applications"; };
150 webstatus-websites = { alias = "Personal websites"; };
151 webstatus-ssl = { alias = "SSL certificates"; };
152 webstatus-dns = { alias = "DNS resolution"; };
153 webstatus-remote-services = { alias = "Other remote services"; };
154 webstatus-local-services = { alias = "Other local services"; };
155 webstatus-email = { alias = "E-mail services"; };
156 webstatus-resources = { alias = "Local resources"; };
157 webstatus-databases = { alias = "Databases resources"; };
158 webstatus-backup = { alias = "Backup resources"; };
159 };
160 hostgroup = {
161 webstatus-hosts = { alias = "Hosts"; };
162 };
107 contactgroup = { 163 contactgroup = {
108 admins = { alias = "Naemon Administrators"; }; 164 admins = { alias = "Naemon Administrators"; };
109 }; 165 };
110 # No contact, we go through master
111 # contact = {
112 # immae = {
113 # alias = "Immae";
114 # use = "generic-contact";
115 # email = "xxxxxxxxxxxxxxxx";
116 # };
117 # };
118 templates = { 166 templates = {
119 service = { 167 service = {
120 generic-service = { 168 generic-service = {
@@ -130,7 +178,7 @@
130 notification_interval = "60"; 178 notification_interval = "60";
131 notification_options = "w,u,c,r,f,s"; 179 notification_options = "w,u,c,r,f,s";
132 notification_period = "24x7"; 180 notification_period = "24x7";
133 notifications_enabled = "0"; # no notification since we send them to master 181 notifications_enabled = if master then "1" else "0";
134 obsess_over_service = "1"; 182 obsess_over_service = "1";
135 passive_checks_enabled = "1"; 183 passive_checks_enabled = "1";
136 process_perf_data = "1"; 184 process_perf_data = "1";
@@ -144,19 +192,51 @@
144 check_interval = "5"; 192 check_interval = "5";
145 max_check_attempts = "4"; 193 max_check_attempts = "4";
146 retry_interval = "1"; 194 retry_interval = "1";
195 servicegroups = "webstatus-resources";
196 };
197 external-service = {
198 use = "generic-service";
199 check_interval = "5";
200 max_check_attempts = "4";
201 retry_interval = "1";
202 };
203 web-service = {
204 use = "generic-service";
205 check_interval = "20";
206 max_check_attempts = "2";
207 retry_interval = "1";
208 };
209 external-web-service = {
210 use = "generic-service";
211 check_interval = "20";
212 max_check_attempts = "2";
213 retry_interval = "1";
214 };
215 mail-service = {
216 use = "generic-service";
217 check_interval = "15";
218 max_check_attempts = "1";
219 retry_interval = "1";
220 };
221 dns-service = {
222 use = "generic-service";
223 check_interval = "120";
224 notification_interval = "120";
225 max_check_attempts = "5";
226 retry_interval = "5";
147 }; 227 };
148 }; 228 };
149 # No contact, we go through master 229 # No contact, we go through master
150 # contact = { 230 contact = {
151 # generic-contact = { 231 generic-contact = {
152 # host_notification_commands = "notify_host_by_email"; 232 host_notification_commands = "notify-host-by-email";
153 # host_notification_options = "d,u,r,f,s"; 233 host_notification_options = "d,u,r,f,s";
154 # host_notification_period = "24x7"; 234 host_notification_period = "24x7";
155 # service_notification_commands = "notify_service_by_email"; 235 service_notification_commands = "notify-service-by-email";
156 # service_notification_options = "w,u,c,r,f,s"; 236 service_notification_options = "w,u,c,r,f,s";
157 # service_notification_period = "24x7"; 237 service_notification_period = "24x7";
158 # }; 238 };
159 # }; 239 };
160 host = { 240 host = {
161 generic-host = { 241 generic-host = {
162 event_handler_enabled = "1"; 242 event_handler_enabled = "1";