]> git.immae.eu Git - perso/Immae/Config/Nix.git/blame - modules/private/monitoring/objects_common.nix
Upgrade nixos to latest
[perso/Immae/Config/Nix.git] / modules / private / monitoring / objects_common.nix
CommitLineData
eb071dd4
IB
1{ hostFQDN
2, processWarn ? "250"
3, processAlert ? "400"
4, loadWarn ? "8.0"
5, loadAlert ? "10.0"
6, mdadm
7, sudo
8, ...
9}:
10{
11 host = {
12 "${hostFQDN}" = {
13 alias = hostFQDN;
14 address = hostFQDN;
15 use = "linux-server";
16 };
17 };
18 service = [
19 {
20 service_description = "Size on root partition";
21 use = "local-service";
22 check_command = ["check_local_disk" "20%" "10%" "/"];
23 }
24 {
25 service_description = "Total number of process";
26 use = "local-service";
27 check_command = [
28 "check_local_procs"
29 processWarn
30 processAlert
31 "RSZDT"
32 ];
33 }
34 {
35 service_description = "Average load";
36 use = "local-service";
37 check_command = [
38 "check_local_load"
39 "${loadWarn},${loadWarn},${loadWarn}"
40 "${loadAlert},${loadAlert},${loadAlert}"
41 ];
42 }
43 {
44 service_description = "Swap usage";
45 use = "local-service";
46 check_command = ["check_local_swap" "20" "10"];
47 }
48 {
49 service_description = "Memory usage";
50 use = "local-service";
51 check_command = ["check_memory" "80" "90"];
52 }
53 {
54 service_description = "NTP is activated and working";
55 use = "local-service";
56 check_command = ["check_ntp"];
57 }
58 {
59 service_description = "No mdadm array is degraded";
60 use = "local-service";
61 check_command = [
62 "check_command_output"
63 "${mdadm}/bin/mdadm --monitor --scan -1"
64 "^$"
65 "-s 0 -r root"
66 ];
67 }
68 ];
69 command = {
70 check_local_disk = "$USER1$/check_disk -w $ARG1$ -c $ARG2$ -p $ARG3$";
71 check_local_procs = "$USER1$/check_procs -w $ARG1$ -c $ARG2$ -s $ARG3$";
72 check_local_load = "$USER1$/check_load -w $ARG1$ -c $ARG2$";
73 check_local_swap = "$USER1$/check_swap -n ok -w $ARG1$ -c $ARG2$";
74 check_memory = "$USER2$/check_mem.sh -w $ARG1$ -c $ARG2$";
75 check_command_output = "$USER2$/check_command -c \"$ARG1$\" -s 0 -o \"$ARG2$\" $ARG3$";
76 check_ntp = "$USER1$/check_ntp_time -t 30 -q -H 0.arch.pool.ntp.org";
6015a3b5 77 check_mysql_replication = "${sudo} -u mysql $USER2$/check_mysql_replication \"$ARG1$\" \"$ARG2$\"";
eb071dd4 78 check_postgresql_replication = "${sudo} -u postgres $USER2$/check_postgres_replication \"$ARG1$\" \"$ARG2$\" \"$ARG3$\"";
16b80abd 79 check_openldap_replication = "${sudo} -u openldap $USER2$/check_openldap_replication \"$ARG1$\" \"$ARG2$\" \"$ARG3$\" \"$ARG4$\" \"$ARG5$\"";
6015a3b5 80 check_redis_replication = "${sudo} -u redis $USER2$/check_redis_replication \"$ARG1$\"";
eb071dd4
IB
81 check_mailq = "$USER1$/check_mailq -s -w 1 -c 2";
82
83 check_host_alive = "$USER1$/check_ping -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 5";
84 check_last_file_date = "${sudo} -u \"$ARG3$\" $USER2$/check_last_file_date \"$ARG1$\" \"$ARG2$\"";
85
86 # No notify commands, we go through master
87 # notify_host_by_email = "SERVICENOTIFICATIONID=\"$SERVICENOTIFICATIONID$\" HOSTSTATE=\"$HOSTSTATE$\" HOSTOUTPUT=\"$HOSTOUTPUT$\" $USER2$/notify_by_email host \"$NOTIFICATIONTYPE$\" \"$HOSTALIAS$\" \"$LONGDATETIME$\" \"$CONTACTEMAIL$\" $OVE";
88 # #$OVE is to force naemon to run via shell instead of execve which fails here
89 # notify_service_by_email = "SERVICENOTIFICATIONID=\"$SERVICENOTIFICATIONID$\" SERVICEDESC=\"$SERVICEDESC$\" SERVICESTATE=\"$SERVICESTATE$\" SERVICEOUTPUT=\"$SERVICEOUTPUT$\" $USER2$/notify_by_email service \"$NOTIFICATIONTYPE$\" \"$HOSTALIAS$\" \"$LONGDATETIME$\" \"$CONTACTEMAIL$\" $OVE";
90 # #sudo /usr/bin/strace -o /tmp/foo -vf -s 256 -u naemon $USER2$/notify_by_email
91 # #$OVE is to force naemon to run via shell instead of execve which fails here
92
93 notify-master = "$USER2$/send_nrdp.sh -u \"$USER200$\" -t \"$USER201$\" -H \"$HOSTADDRESS$\" -s \"$SERVICEDESC$\" -S \"$SERVICESTATEID$\" -o \"$SERVICEOUTPUT$\"";
94 };
95 timeperiod = {
96 "24x7" = {
97 alias = "24 Hours A Day, 7 Days A Week";
98 monday = "00:00-24:00";
99 tuesday = "00:00-24:00";
100 wednesday = "00:00-24:00";
101 thursday = "00:00-24:00";
102 friday = "00:00-24:00";
103 saturday = "00:00-24:00";
104 sunday = "00:00-24:00";
105 };
106 };
107 contactgroup = {
108 admins = { alias = "Naemon Administrators"; };
109 };
110 # No contact, we go through master
111 # contact = {
112 # immae = {
113 # alias = "Immae";
114 # use = "generic-contact";
115 # email = "xxxxxxxxxxxxxxxx";
116 # };
117 # };
118 templates = {
119 service = {
120 generic-service = {
121 active_checks_enabled = "1";
122 check_freshness = "0";
123 check_interval = "10";
124 check_period = "24x7";
125 contact_groups = "admins";
126 event_handler_enabled = "1";
127 flap_detection_enabled = "1";
128 is_volatile = "0";
129 max_check_attempts = "3";
130 notification_interval = "60";
131 notification_options = "w,u,c,r,f,s";
132 notification_period = "24x7";
133 notifications_enabled = "0"; # no notification since we send them to master
134 obsess_over_service = "1";
135 passive_checks_enabled = "1";
136 process_perf_data = "1";
137 retain_nonstatus_information = "1";
138 retain_status_information = "1";
139 retry_interval = "2";
140 };
141 local-service = {
142 use = "generic-service";
143 host_name = hostFQDN;
144 check_interval = "5";
145 max_check_attempts = "4";
146 retry_interval = "1";
147 };
148 };
149 # No contact, we go through master
150 # contact = {
151 # generic-contact = {
152 # host_notification_commands = "notify_host_by_email";
153 # host_notification_options = "d,u,r,f,s";
154 # host_notification_period = "24x7";
155 # service_notification_commands = "notify_service_by_email";
156 # service_notification_options = "w,u,c,r,f,s";
157 # service_notification_period = "24x7";
158 # };
159 # };
160 host = {
161 generic-host = {
162 event_handler_enabled = "1";
163 flap_detection_enabled = "1";
164 notification_period = "24x7";
165 notifications_enabled = "1";
166 process_perf_data = "1";
167 retain_nonstatus_information = "1";
168 retain_status_information = "1";
169 };
170 linux-server = {
171 check_command = "check_host_alive";
172 check_interval = "5";
173 check_period = "24x7";
174 contact_groups = "admins";
175 max_check_attempts = "10";
176 notification_interval = "120";
177 notification_options = "d,u,r,f";
178 retry_interval = "1";
179 };
180 };
181 };
182}