]> git.immae.eu Git - perso/Immae/Config/Nix.git/blame - modules/private/monitoring/objects_common.nix
Use nix expressions to build monitoring list
[perso/Immae/Config/Nix.git] / modules / private / monitoring / objects_common.nix
CommitLineData
eb071dd4
IB
1{ hostFQDN
2, processWarn ? "250"
3, processAlert ? "400"
4, loadWarn ? "8.0"
5, loadAlert ? "10.0"
6, mdadm
7, sudo
8, ...
9}:
10{
11 host = {
12 "${hostFQDN}" = {
13 alias = hostFQDN;
14 address = hostFQDN;
15 use = "linux-server";
16 };
17 };
18 service = [
19 {
20 service_description = "Size on root partition";
21 use = "local-service";
22 check_command = ["check_local_disk" "20%" "10%" "/"];
23 }
24 {
25 service_description = "Total number of process";
26 use = "local-service";
27 check_command = [
28 "check_local_procs"
29 processWarn
30 processAlert
31 "RSZDT"
32 ];
33 }
34 {
35 service_description = "Average load";
36 use = "local-service";
37 check_command = [
38 "check_local_load"
39 "${loadWarn},${loadWarn},${loadWarn}"
40 "${loadAlert},${loadAlert},${loadAlert}"
41 ];
42 }
43 {
44 service_description = "Swap usage";
45 use = "local-service";
46 check_command = ["check_local_swap" "20" "10"];
47 }
48 {
49 service_description = "Memory usage";
50 use = "local-service";
51 check_command = ["check_memory" "80" "90"];
52 }
53 {
54 service_description = "NTP is activated and working";
55 use = "local-service";
56 check_command = ["check_ntp"];
57 }
58 {
59 service_description = "No mdadm array is degraded";
60 use = "local-service";
61 check_command = [
62 "check_command_output"
63 "${mdadm}/bin/mdadm --monitor --scan -1"
64 "^$"
65 "-s 0 -r root"
66 ];
67 }
68 ];
69 command = {
70 check_local_disk = "$USER1$/check_disk -w $ARG1$ -c $ARG2$ -p $ARG3$";
71 check_local_procs = "$USER1$/check_procs -w $ARG1$ -c $ARG2$ -s $ARG3$";
72 check_local_load = "$USER1$/check_load -w $ARG1$ -c $ARG2$";
73 check_local_swap = "$USER1$/check_swap -n ok -w $ARG1$ -c $ARG2$";
74 check_memory = "$USER2$/check_mem.sh -w $ARG1$ -c $ARG2$";
75 check_command_output = "$USER2$/check_command -c \"$ARG1$\" -s 0 -o \"$ARG2$\" $ARG3$";
76 check_ntp = "$USER1$/check_ntp_time -t 30 -q -H 0.arch.pool.ntp.org";
77 check_postgresql_replication = "${sudo} -u postgres $USER2$/check_postgres_replication \"$ARG1$\" \"$ARG2$\" \"$ARG3$\"";
78 check_mailq = "$USER1$/check_mailq -s -w 1 -c 2";
79
80 check_host_alive = "$USER1$/check_ping -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 5";
81 check_last_file_date = "${sudo} -u \"$ARG3$\" $USER2$/check_last_file_date \"$ARG1$\" \"$ARG2$\"";
82
83 # No notify commands, we go through master
84 # notify_host_by_email = "SERVICENOTIFICATIONID=\"$SERVICENOTIFICATIONID$\" HOSTSTATE=\"$HOSTSTATE$\" HOSTOUTPUT=\"$HOSTOUTPUT$\" $USER2$/notify_by_email host \"$NOTIFICATIONTYPE$\" \"$HOSTALIAS$\" \"$LONGDATETIME$\" \"$CONTACTEMAIL$\" $OVE";
85 # #$OVE is to force naemon to run via shell instead of execve which fails here
86 # notify_service_by_email = "SERVICENOTIFICATIONID=\"$SERVICENOTIFICATIONID$\" SERVICEDESC=\"$SERVICEDESC$\" SERVICESTATE=\"$SERVICESTATE$\" SERVICEOUTPUT=\"$SERVICEOUTPUT$\" $USER2$/notify_by_email service \"$NOTIFICATIONTYPE$\" \"$HOSTALIAS$\" \"$LONGDATETIME$\" \"$CONTACTEMAIL$\" $OVE";
87 # #sudo /usr/bin/strace -o /tmp/foo -vf -s 256 -u naemon $USER2$/notify_by_email
88 # #$OVE is to force naemon to run via shell instead of execve which fails here
89
90 notify-master = "$USER2$/send_nrdp.sh -u \"$USER200$\" -t \"$USER201$\" -H \"$HOSTADDRESS$\" -s \"$SERVICEDESC$\" -S \"$SERVICESTATEID$\" -o \"$SERVICEOUTPUT$\"";
91 };
92 timeperiod = {
93 "24x7" = {
94 alias = "24 Hours A Day, 7 Days A Week";
95 monday = "00:00-24:00";
96 tuesday = "00:00-24:00";
97 wednesday = "00:00-24:00";
98 thursday = "00:00-24:00";
99 friday = "00:00-24:00";
100 saturday = "00:00-24:00";
101 sunday = "00:00-24:00";
102 };
103 };
104 contactgroup = {
105 admins = { alias = "Naemon Administrators"; };
106 };
107 # No contact, we go through master
108 # contact = {
109 # immae = {
110 # alias = "Immae";
111 # use = "generic-contact";
112 # email = "xxxxxxxxxxxxxxxx";
113 # };
114 # };
115 templates = {
116 service = {
117 generic-service = {
118 active_checks_enabled = "1";
119 check_freshness = "0";
120 check_interval = "10";
121 check_period = "24x7";
122 contact_groups = "admins";
123 event_handler_enabled = "1";
124 flap_detection_enabled = "1";
125 is_volatile = "0";
126 max_check_attempts = "3";
127 notification_interval = "60";
128 notification_options = "w,u,c,r,f,s";
129 notification_period = "24x7";
130 notifications_enabled = "0"; # no notification since we send them to master
131 obsess_over_service = "1";
132 passive_checks_enabled = "1";
133 process_perf_data = "1";
134 retain_nonstatus_information = "1";
135 retain_status_information = "1";
136 retry_interval = "2";
137 };
138 local-service = {
139 use = "generic-service";
140 host_name = hostFQDN;
141 check_interval = "5";
142 max_check_attempts = "4";
143 retry_interval = "1";
144 };
145 };
146 # No contact, we go through master
147 # contact = {
148 # generic-contact = {
149 # host_notification_commands = "notify_host_by_email";
150 # host_notification_options = "d,u,r,f,s";
151 # host_notification_period = "24x7";
152 # service_notification_commands = "notify_service_by_email";
153 # service_notification_options = "w,u,c,r,f,s";
154 # service_notification_period = "24x7";
155 # };
156 # };
157 host = {
158 generic-host = {
159 event_handler_enabled = "1";
160 flap_detection_enabled = "1";
161 notification_period = "24x7";
162 notifications_enabled = "1";
163 process_perf_data = "1";
164 retain_nonstatus_information = "1";
165 retain_status_information = "1";
166 };
167 linux-server = {
168 check_command = "check_host_alive";
169 check_interval = "5";
170 check_period = "24x7";
171 contact_groups = "admins";
172 max_check_attempts = "10";
173 notification_interval = "120";
174 notification_options = "d,u,r,f";
175 retry_interval = "1";
176 };
177 };
178 };
179}