diff options
author | Ismaël Bouya <ismael.bouya@normalesup.org> | 2019-12-02 01:33:08 +0100 |
---|---|---|
committer | Ismaël Bouya <ismael.bouya@normalesup.org> | 2019-12-02 01:33:08 +0100 |
commit | eb071dd42518cb40d629e5bde29c6aed72e4d4df (patch) | |
tree | f9a109801d540ef8fc4c1b0656436f560ad1f6b7 /modules/private/monitoring/objects_common.nix | |
parent | 9f2025235d888eb4a7822024a5fad2e288388814 (diff) | |
download | Nix-eb071dd42518cb40d629e5bde29c6aed72e4d4df.tar.gz Nix-eb071dd42518cb40d629e5bde29c6aed72e4d4df.tar.zst Nix-eb071dd42518cb40d629e5bde29c6aed72e4d4df.zip |
Use nix expressions to build monitoring list
Diffstat (limited to 'modules/private/monitoring/objects_common.nix')
-rw-r--r-- | modules/private/monitoring/objects_common.nix | 179 |
1 files changed, 179 insertions, 0 deletions
diff --git a/modules/private/monitoring/objects_common.nix b/modules/private/monitoring/objects_common.nix new file mode 100644 index 0000000..8466fdb --- /dev/null +++ b/modules/private/monitoring/objects_common.nix | |||
@@ -0,0 +1,179 @@ | |||
1 | { hostFQDN | ||
2 | , processWarn ? "250" | ||
3 | , processAlert ? "400" | ||
4 | , loadWarn ? "8.0" | ||
5 | , loadAlert ? "10.0" | ||
6 | , mdadm | ||
7 | , sudo | ||
8 | , ... | ||
9 | }: | ||
10 | { | ||
11 | host = { | ||
12 | "${hostFQDN}" = { | ||
13 | alias = hostFQDN; | ||
14 | address = hostFQDN; | ||
15 | use = "linux-server"; | ||
16 | }; | ||
17 | }; | ||
18 | service = [ | ||
19 | { | ||
20 | service_description = "Size on root partition"; | ||
21 | use = "local-service"; | ||
22 | check_command = ["check_local_disk" "20%" "10%" "/"]; | ||
23 | } | ||
24 | { | ||
25 | service_description = "Total number of process"; | ||
26 | use = "local-service"; | ||
27 | check_command = [ | ||
28 | "check_local_procs" | ||
29 | processWarn | ||
30 | processAlert | ||
31 | "RSZDT" | ||
32 | ]; | ||
33 | } | ||
34 | { | ||
35 | service_description = "Average load"; | ||
36 | use = "local-service"; | ||
37 | check_command = [ | ||
38 | "check_local_load" | ||
39 | "${loadWarn},${loadWarn},${loadWarn}" | ||
40 | "${loadAlert},${loadAlert},${loadAlert}" | ||
41 | ]; | ||
42 | } | ||
43 | { | ||
44 | service_description = "Swap usage"; | ||
45 | use = "local-service"; | ||
46 | check_command = ["check_local_swap" "20" "10"]; | ||
47 | } | ||
48 | { | ||
49 | service_description = "Memory usage"; | ||
50 | use = "local-service"; | ||
51 | check_command = ["check_memory" "80" "90"]; | ||
52 | } | ||
53 | { | ||
54 | service_description = "NTP is activated and working"; | ||
55 | use = "local-service"; | ||
56 | check_command = ["check_ntp"]; | ||
57 | } | ||
58 | { | ||
59 | service_description = "No mdadm array is degraded"; | ||
60 | use = "local-service"; | ||
61 | check_command = [ | ||
62 | "check_command_output" | ||
63 | "${mdadm}/bin/mdadm --monitor --scan -1" | ||
64 | "^$" | ||
65 | "-s 0 -r root" | ||
66 | ]; | ||
67 | } | ||
68 | ]; | ||
69 | command = { | ||
70 | check_local_disk = "$USER1$/check_disk -w $ARG1$ -c $ARG2$ -p $ARG3$"; | ||
71 | check_local_procs = "$USER1$/check_procs -w $ARG1$ -c $ARG2$ -s $ARG3$"; | ||
72 | check_local_load = "$USER1$/check_load -w $ARG1$ -c $ARG2$"; | ||
73 | check_local_swap = "$USER1$/check_swap -n ok -w $ARG1$ -c $ARG2$"; | ||
74 | check_memory = "$USER2$/check_mem.sh -w $ARG1$ -c $ARG2$"; | ||
75 | check_command_output = "$USER2$/check_command -c \"$ARG1$\" -s 0 -o \"$ARG2$\" $ARG3$"; | ||
76 | check_ntp = "$USER1$/check_ntp_time -t 30 -q -H 0.arch.pool.ntp.org"; | ||
77 | check_postgresql_replication = "${sudo} -u postgres $USER2$/check_postgres_replication \"$ARG1$\" \"$ARG2$\" \"$ARG3$\""; | ||
78 | check_mailq = "$USER1$/check_mailq -s -w 1 -c 2"; | ||
79 | |||
80 | check_host_alive = "$USER1$/check_ping -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 5"; | ||
81 | check_last_file_date = "${sudo} -u \"$ARG3$\" $USER2$/check_last_file_date \"$ARG1$\" \"$ARG2$\""; | ||
82 | |||
83 | # No notify commands, we go through master | ||
84 | # notify_host_by_email = "SERVICENOTIFICATIONID=\"$SERVICENOTIFICATIONID$\" HOSTSTATE=\"$HOSTSTATE$\" HOSTOUTPUT=\"$HOSTOUTPUT$\" $USER2$/notify_by_email host \"$NOTIFICATIONTYPE$\" \"$HOSTALIAS$\" \"$LONGDATETIME$\" \"$CONTACTEMAIL$\" $OVE"; | ||
85 | # #$OVE is to force naemon to run via shell instead of execve which fails here | ||
86 | # notify_service_by_email = "SERVICENOTIFICATIONID=\"$SERVICENOTIFICATIONID$\" SERVICEDESC=\"$SERVICEDESC$\" SERVICESTATE=\"$SERVICESTATE$\" SERVICEOUTPUT=\"$SERVICEOUTPUT$\" $USER2$/notify_by_email service \"$NOTIFICATIONTYPE$\" \"$HOSTALIAS$\" \"$LONGDATETIME$\" \"$CONTACTEMAIL$\" $OVE"; | ||
87 | # #sudo /usr/bin/strace -o /tmp/foo -vf -s 256 -u naemon $USER2$/notify_by_email | ||
88 | # #$OVE is to force naemon to run via shell instead of execve which fails here | ||
89 | |||
90 | notify-master = "$USER2$/send_nrdp.sh -u \"$USER200$\" -t \"$USER201$\" -H \"$HOSTADDRESS$\" -s \"$SERVICEDESC$\" -S \"$SERVICESTATEID$\" -o \"$SERVICEOUTPUT$\""; | ||
91 | }; | ||
92 | timeperiod = { | ||
93 | "24x7" = { | ||
94 | alias = "24 Hours A Day, 7 Days A Week"; | ||
95 | monday = "00:00-24:00"; | ||
96 | tuesday = "00:00-24:00"; | ||
97 | wednesday = "00:00-24:00"; | ||
98 | thursday = "00:00-24:00"; | ||
99 | friday = "00:00-24:00"; | ||
100 | saturday = "00:00-24:00"; | ||
101 | sunday = "00:00-24:00"; | ||
102 | }; | ||
103 | }; | ||
104 | contactgroup = { | ||
105 | admins = { alias = "Naemon Administrators"; }; | ||
106 | }; | ||
107 | # No contact, we go through master | ||
108 | # contact = { | ||
109 | # immae = { | ||
110 | # alias = "Immae"; | ||
111 | # use = "generic-contact"; | ||
112 | # email = "xxxxxxxxxxxxxxxx"; | ||
113 | # }; | ||
114 | # }; | ||
115 | templates = { | ||
116 | service = { | ||
117 | generic-service = { | ||
118 | active_checks_enabled = "1"; | ||
119 | check_freshness = "0"; | ||
120 | check_interval = "10"; | ||
121 | check_period = "24x7"; | ||
122 | contact_groups = "admins"; | ||
123 | event_handler_enabled = "1"; | ||
124 | flap_detection_enabled = "1"; | ||
125 | is_volatile = "0"; | ||
126 | max_check_attempts = "3"; | ||
127 | notification_interval = "60"; | ||
128 | notification_options = "w,u,c,r,f,s"; | ||
129 | notification_period = "24x7"; | ||
130 | notifications_enabled = "0"; # no notification since we send them to master | ||
131 | obsess_over_service = "1"; | ||
132 | passive_checks_enabled = "1"; | ||
133 | process_perf_data = "1"; | ||
134 | retain_nonstatus_information = "1"; | ||
135 | retain_status_information = "1"; | ||
136 | retry_interval = "2"; | ||
137 | }; | ||
138 | local-service = { | ||
139 | use = "generic-service"; | ||
140 | host_name = hostFQDN; | ||
141 | check_interval = "5"; | ||
142 | max_check_attempts = "4"; | ||
143 | retry_interval = "1"; | ||
144 | }; | ||
145 | }; | ||
146 | # No contact, we go through master | ||
147 | # contact = { | ||
148 | # generic-contact = { | ||
149 | # host_notification_commands = "notify_host_by_email"; | ||
150 | # host_notification_options = "d,u,r,f,s"; | ||
151 | # host_notification_period = "24x7"; | ||
152 | # service_notification_commands = "notify_service_by_email"; | ||
153 | # service_notification_options = "w,u,c,r,f,s"; | ||
154 | # service_notification_period = "24x7"; | ||
155 | # }; | ||
156 | # }; | ||
157 | host = { | ||
158 | generic-host = { | ||
159 | event_handler_enabled = "1"; | ||
160 | flap_detection_enabled = "1"; | ||
161 | notification_period = "24x7"; | ||
162 | notifications_enabled = "1"; | ||
163 | process_perf_data = "1"; | ||
164 | retain_nonstatus_information = "1"; | ||
165 | retain_status_information = "1"; | ||
166 | }; | ||
167 | linux-server = { | ||
168 | check_command = "check_host_alive"; | ||
169 | check_interval = "5"; | ||
170 | check_period = "24x7"; | ||
171 | contact_groups = "admins"; | ||
172 | max_check_attempts = "10"; | ||
173 | notification_interval = "120"; | ||
174 | notification_options = "d,u,r,f"; | ||
175 | retry_interval = "1"; | ||
176 | }; | ||
177 | }; | ||
178 | }; | ||
179 | } | ||