]> git.immae.eu Git - perso/Immae/Config/Nix.git/blame - modules/private/monitoring/objects_common.nix
Add mysql and redis monitoring
[perso/Immae/Config/Nix.git] / modules / private / monitoring / objects_common.nix
CommitLineData
eb071dd4
IB
1{ hostFQDN
2, processWarn ? "250"
3, processAlert ? "400"
4, loadWarn ? "8.0"
5, loadAlert ? "10.0"
6, mdadm
7, sudo
8, ...
9}:
10{
11 host = {
12 "${hostFQDN}" = {
13 alias = hostFQDN;
14 address = hostFQDN;
15 use = "linux-server";
16 };
17 };
18 service = [
19 {
20 service_description = "Size on root partition";
21 use = "local-service";
22 check_command = ["check_local_disk" "20%" "10%" "/"];
23 }
24 {
25 service_description = "Total number of process";
26 use = "local-service";
27 check_command = [
28 "check_local_procs"
29 processWarn
30 processAlert
31 "RSZDT"
32 ];
33 }
34 {
35 service_description = "Average load";
36 use = "local-service";
37 check_command = [
38 "check_local_load"
39 "${loadWarn},${loadWarn},${loadWarn}"
40 "${loadAlert},${loadAlert},${loadAlert}"
41 ];
42 }
43 {
44 service_description = "Swap usage";
45 use = "local-service";
46 check_command = ["check_local_swap" "20" "10"];
47 }
48 {
49 service_description = "Memory usage";
50 use = "local-service";
51 check_command = ["check_memory" "80" "90"];
52 }
53 {
54 service_description = "NTP is activated and working";
55 use = "local-service";
56 check_command = ["check_ntp"];
57 }
58 {
59 service_description = "No mdadm array is degraded";
60 use = "local-service";
61 check_command = [
62 "check_command_output"
63 "${mdadm}/bin/mdadm --monitor --scan -1"
64 "^$"
65 "-s 0 -r root"
66 ];
67 }
68 ];
69 command = {
70 check_local_disk = "$USER1$/check_disk -w $ARG1$ -c $ARG2$ -p $ARG3$";
71 check_local_procs = "$USER1$/check_procs -w $ARG1$ -c $ARG2$ -s $ARG3$";
72 check_local_load = "$USER1$/check_load -w $ARG1$ -c $ARG2$";
73 check_local_swap = "$USER1$/check_swap -n ok -w $ARG1$ -c $ARG2$";
74 check_memory = "$USER2$/check_mem.sh -w $ARG1$ -c $ARG2$";
75 check_command_output = "$USER2$/check_command -c \"$ARG1$\" -s 0 -o \"$ARG2$\" $ARG3$";
76 check_ntp = "$USER1$/check_ntp_time -t 30 -q -H 0.arch.pool.ntp.org";
6015a3b5 77 check_mysql_replication = "${sudo} -u mysql $USER2$/check_mysql_replication \"$ARG1$\" \"$ARG2$\"";
eb071dd4 78 check_postgresql_replication = "${sudo} -u postgres $USER2$/check_postgres_replication \"$ARG1$\" \"$ARG2$\" \"$ARG3$\"";
6015a3b5 79 check_redis_replication = "${sudo} -u redis $USER2$/check_redis_replication \"$ARG1$\"";
eb071dd4
IB
80 check_mailq = "$USER1$/check_mailq -s -w 1 -c 2";
81
82 check_host_alive = "$USER1$/check_ping -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 5";
83 check_last_file_date = "${sudo} -u \"$ARG3$\" $USER2$/check_last_file_date \"$ARG1$\" \"$ARG2$\"";
84
85 # No notify commands, we go through master
86 # notify_host_by_email = "SERVICENOTIFICATIONID=\"$SERVICENOTIFICATIONID$\" HOSTSTATE=\"$HOSTSTATE$\" HOSTOUTPUT=\"$HOSTOUTPUT$\" $USER2$/notify_by_email host \"$NOTIFICATIONTYPE$\" \"$HOSTALIAS$\" \"$LONGDATETIME$\" \"$CONTACTEMAIL$\" $OVE";
87 # #$OVE is to force naemon to run via shell instead of execve which fails here
88 # notify_service_by_email = "SERVICENOTIFICATIONID=\"$SERVICENOTIFICATIONID$\" SERVICEDESC=\"$SERVICEDESC$\" SERVICESTATE=\"$SERVICESTATE$\" SERVICEOUTPUT=\"$SERVICEOUTPUT$\" $USER2$/notify_by_email service \"$NOTIFICATIONTYPE$\" \"$HOSTALIAS$\" \"$LONGDATETIME$\" \"$CONTACTEMAIL$\" $OVE";
89 # #sudo /usr/bin/strace -o /tmp/foo -vf -s 256 -u naemon $USER2$/notify_by_email
90 # #$OVE is to force naemon to run via shell instead of execve which fails here
91
92 notify-master = "$USER2$/send_nrdp.sh -u \"$USER200$\" -t \"$USER201$\" -H \"$HOSTADDRESS$\" -s \"$SERVICEDESC$\" -S \"$SERVICESTATEID$\" -o \"$SERVICEOUTPUT$\"";
93 };
94 timeperiod = {
95 "24x7" = {
96 alias = "24 Hours A Day, 7 Days A Week";
97 monday = "00:00-24:00";
98 tuesday = "00:00-24:00";
99 wednesday = "00:00-24:00";
100 thursday = "00:00-24:00";
101 friday = "00:00-24:00";
102 saturday = "00:00-24:00";
103 sunday = "00:00-24:00";
104 };
105 };
106 contactgroup = {
107 admins = { alias = "Naemon Administrators"; };
108 };
109 # No contact, we go through master
110 # contact = {
111 # immae = {
112 # alias = "Immae";
113 # use = "generic-contact";
114 # email = "xxxxxxxxxxxxxxxx";
115 # };
116 # };
117 templates = {
118 service = {
119 generic-service = {
120 active_checks_enabled = "1";
121 check_freshness = "0";
122 check_interval = "10";
123 check_period = "24x7";
124 contact_groups = "admins";
125 event_handler_enabled = "1";
126 flap_detection_enabled = "1";
127 is_volatile = "0";
128 max_check_attempts = "3";
129 notification_interval = "60";
130 notification_options = "w,u,c,r,f,s";
131 notification_period = "24x7";
132 notifications_enabled = "0"; # no notification since we send them to master
133 obsess_over_service = "1";
134 passive_checks_enabled = "1";
135 process_perf_data = "1";
136 retain_nonstatus_information = "1";
137 retain_status_information = "1";
138 retry_interval = "2";
139 };
140 local-service = {
141 use = "generic-service";
142 host_name = hostFQDN;
143 check_interval = "5";
144 max_check_attempts = "4";
145 retry_interval = "1";
146 };
147 };
148 # No contact, we go through master
149 # contact = {
150 # generic-contact = {
151 # host_notification_commands = "notify_host_by_email";
152 # host_notification_options = "d,u,r,f,s";
153 # host_notification_period = "24x7";
154 # service_notification_commands = "notify_service_by_email";
155 # service_notification_options = "w,u,c,r,f,s";
156 # service_notification_period = "24x7";
157 # };
158 # };
159 host = {
160 generic-host = {
161 event_handler_enabled = "1";
162 flap_detection_enabled = "1";
163 notification_period = "24x7";
164 notifications_enabled = "1";
165 process_perf_data = "1";
166 retain_nonstatus_information = "1";
167 retain_status_information = "1";
168 };
169 linux-server = {
170 check_command = "check_host_alive";
171 check_interval = "5";
172 check_period = "24x7";
173 contact_groups = "admins";
174 max_check_attempts = "10";
175 notification_interval = "120";
176 notification_options = "d,u,r,f";
177 retry_interval = "1";
178 };
179 };
180 };
181}