diff options
author | Ismaël Bouya <ismael.bouya@normalesup.org> | 2019-12-02 01:33:08 +0100 |
---|---|---|
committer | Ismaël Bouya <ismael.bouya@normalesup.org> | 2019-12-02 01:33:08 +0100 |
commit | eb071dd42518cb40d629e5bde29c6aed72e4d4df (patch) | |
tree | f9a109801d540ef8fc4c1b0656436f560ad1f6b7 /modules/private | |
parent | 9f2025235d888eb4a7822024a5fad2e288388814 (diff) | |
download | Nix-eb071dd42518cb40d629e5bde29c6aed72e4d4df.tar.gz Nix-eb071dd42518cb40d629e5bde29c6aed72e4d4df.tar.zst Nix-eb071dd42518cb40d629e5bde29c6aed72e4d4df.zip |
Use nix expressions to build monitoring list
Diffstat (limited to 'modules/private')
-rw-r--r-- | modules/private/monitoring/conf/contacts.cfg | 41 | ||||
-rw-r--r-- | modules/private/monitoring/conf/hosts.cfg | 32 | ||||
-rw-r--r-- | modules/private/monitoring/conf/local_services.cfg | 68 | ||||
-rw-r--r-- | modules/private/monitoring/conf/notify.cfg | 8 | ||||
-rw-r--r-- | modules/private/monitoring/conf/objects.cfg | 84 | ||||
-rw-r--r-- | modules/private/monitoring/conf/services.cfg | 27 | ||||
-rw-r--r-- | modules/private/monitoring/conf/specific_backup-2.cfg | 36 | ||||
-rw-r--r-- | modules/private/monitoring/conf/specific_eldiron.cfg | 29 | ||||
-rw-r--r-- | modules/private/monitoring/conf/timeperiods.cfg | 15 | ||||
-rw-r--r-- | modules/private/monitoring/default.nix | 56 | ||||
-rw-r--r-- | modules/private/monitoring/objects_backup-2.nix | 30 | ||||
-rw-r--r-- | modules/private/monitoring/objects_common.nix | 179 | ||||
-rw-r--r-- | modules/private/monitoring/objects_eldiron.nix | 15 | ||||
-rw-r--r-- | modules/private/monitoring/to_objects.nix | 67 |
14 files changed, 311 insertions, 376 deletions
diff --git a/modules/private/monitoring/conf/contacts.cfg b/modules/private/monitoring/conf/contacts.cfg deleted file mode 100644 index b6ea84d..0000000 --- a/modules/private/monitoring/conf/contacts.cfg +++ /dev/null | |||
@@ -1,41 +0,0 @@ | |||
1 | # vim: filetype=nagios | ||
2 | |||
3 | # CONTACT GROUPS | ||
4 | define contactgroup { | ||
5 | contactgroup_name admins | ||
6 | alias Naemon Administrators | ||
7 | # members immae | ||
8 | } | ||
9 | |||
10 | # No contact, we go through master | ||
11 | # define contact { | ||
12 | # contact_name immae | ||
13 | # alias Immae | ||
14 | # use generic-contact | ||
15 | # email xxxxxxxxxxxxxxxx | ||
16 | # } | ||
17 | # | ||
18 | # define contact { | ||
19 | # name generic-contact | ||
20 | # host_notification_commands notify-host-by-email | ||
21 | # host_notification_options d,u,r,f,s | ||
22 | # host_notification_period 24x7 | ||
23 | # register 0 | ||
24 | # service_notification_commands notify-service-by-email | ||
25 | # service_notification_options w,u,c,r,f,s | ||
26 | # service_notification_period 24x7 | ||
27 | # } | ||
28 | # | ||
29 | # define command { | ||
30 | # command_name notify-host-by-email | ||
31 | # command_line SERVICENOTIFICATIONID="$SERVICENOTIFICATIONID$" HOSTSTATE="$HOSTSTATE$" HOSTOUTPUT="$HOSTOUTPUT$" $USER2$/notify_by_email host "$NOTIFICATIONTYPE$" "$HOSTALIAS$" "$LONGDATETIME$" "$CONTACTEMAIL$" $OVE | ||
32 | # #$OVE is to force naemon to run via shell instead of execve which fails here | ||
33 | # } | ||
34 | # | ||
35 | # # 'notify-service-by-email' command definition | ||
36 | # define command { | ||
37 | # command_name notify-service-by-email | ||
38 | # command_line SERVICENOTIFICATIONID="$SERVICENOTIFICATIONID$" SERVICEDESC="$SERVICEDESC$" SERVICESTATE="$SERVICESTATE$" SERVICEOUTPUT="$SERVICEOUTPUT$" $USER2$/notify_by_email service "$NOTIFICATIONTYPE$" "$HOSTALIAS$" "$LONGDATETIME$" "$CONTACTEMAIL$" $OVE | ||
39 | # # command_line sudo /usr/bin/strace -o /tmp/foo -vf -s 256 -u naemon $USER2$/notify_by_email | ||
40 | # #$OVE is to force naemon to run via shell instead of execve which fails here | ||
41 | # } | ||
diff --git a/modules/private/monitoring/conf/hosts.cfg b/modules/private/monitoring/conf/hosts.cfg deleted file mode 100644 index d903b0a..0000000 --- a/modules/private/monitoring/conf/hosts.cfg +++ /dev/null | |||
@@ -1,32 +0,0 @@ | |||
1 | # vim: filetype=nagios | ||
2 | |||
3 | define host { | ||
4 | name generic-host | ||
5 | event_handler_enabled 1 | ||
6 | flap_detection_enabled 1 | ||
7 | notification_period 24x7 | ||
8 | notifications_enabled 1 | ||
9 | process_perf_data 1 | ||
10 | register 0 | ||
11 | retain_nonstatus_information 1 | ||
12 | retain_status_information 1 | ||
13 | } | ||
14 | |||
15 | define host { | ||
16 | name linux-server | ||
17 | use generic-host | ||
18 | check_command check-host-alive | ||
19 | check_interval 5 | ||
20 | check_period 24x7 | ||
21 | contact_groups admins | ||
22 | max_check_attempts 10 | ||
23 | notification_interval 120 | ||
24 | notification_options d,u,r,f | ||
25 | register 0 | ||
26 | retry_interval 1 | ||
27 | } | ||
28 | |||
29 | define command { | ||
30 | command_name check-host-alive | ||
31 | command_line $USER1$/check_ping -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 5 | ||
32 | } | ||
diff --git a/modules/private/monitoring/conf/local_services.cfg b/modules/private/monitoring/conf/local_services.cfg deleted file mode 100644 index 56bc8f6..0000000 --- a/modules/private/monitoring/conf/local_services.cfg +++ /dev/null | |||
@@ -1,68 +0,0 @@ | |||
1 | # vim: filetype=nagios | ||
2 | |||
3 | # System usage | ||
4 | define service { | ||
5 | service_description Size on root partition | ||
6 | use local-service | ||
7 | check_command check_local_disk!20%!10%!/ | ||
8 | } | ||
9 | define command { | ||
10 | command_line $USER1$/check_disk -w $ARG1$ -c $ARG2$ -p $ARG3$ | ||
11 | command_name check_local_disk | ||
12 | } | ||
13 | |||
14 | define service { | ||
15 | service_description Total number of process | ||
16 | use local-service | ||
17 | check_command check_local_procs!250!400!RSZDT | ||
18 | } | ||
19 | define command { | ||
20 | command_line $USER1$/check_procs -w $ARG1$ -c $ARG2$ -s $ARG3$ | ||
21 | command_name check_local_procs | ||
22 | } | ||
23 | |||
24 | define service { | ||
25 | service_description Average load | ||
26 | use local-service | ||
27 | check_command check_local_load!8.0,8.0,8.0!10.0,10.0,10.0 | ||
28 | } | ||
29 | define command { | ||
30 | command_line $USER1$/check_load -w $ARG1$ -c $ARG2$ | ||
31 | command_name check_local_load | ||
32 | } | ||
33 | |||
34 | define service { | ||
35 | service_description Swap usage | ||
36 | use local-service | ||
37 | check_command check_local_swap!20!10 | ||
38 | } | ||
39 | define command { | ||
40 | command_line $USER1$/check_swap -n ok -w $ARG1$ -c $ARG2$ | ||
41 | command_name check_local_swap | ||
42 | } | ||
43 | |||
44 | define service { | ||
45 | service_description Memory usage | ||
46 | use local-service | ||
47 | check_command check_memory!80!90 | ||
48 | } | ||
49 | define command { | ||
50 | command_line $USER2$/check_mem.sh -w $ARG1$ -c $ARG2$ | ||
51 | command_name check_memory | ||
52 | } | ||
53 | |||
54 | define command { | ||
55 | command_line $USER2$/check_command -c "$ARG1$" -s 0 -o "$ARG2$" $ARG3$ | ||
56 | command_name check_command_output | ||
57 | } | ||
58 | |||
59 | # Network dependent local services | ||
60 | define service { | ||
61 | service_description NTP is activated and working | ||
62 | use local-service | ||
63 | check_command check_ntp | ||
64 | } | ||
65 | define command { | ||
66 | command_line $USER1$/check_ntp_time -t 30 -q -H 0.arch.pool.ntp.org | ||
67 | command_name check_ntp | ||
68 | } | ||
diff --git a/modules/private/monitoring/conf/notify.cfg b/modules/private/monitoring/conf/notify.cfg deleted file mode 100644 index 63b380d..0000000 --- a/modules/private/monitoring/conf/notify.cfg +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | # vim: filetype=nagios | ||
2 | |||
3 | define command { | ||
4 | command_line /etc/naemon/send_nrdp.sh -H "$HOSTADDRESS$" -s "$SERVICEDESC$" -S "$SERVICESTATEID$" -o "$SERVICEOUTPUT$" | ||
5 | command_name notify-master | ||
6 | } | ||
7 | |||
8 | |||
diff --git a/modules/private/monitoring/conf/objects.cfg b/modules/private/monitoring/conf/objects.cfg deleted file mode 100644 index 653477f..0000000 --- a/modules/private/monitoring/conf/objects.cfg +++ /dev/null | |||
@@ -1,84 +0,0 @@ | |||
1 | # vim: filetype=nagios | ||
2 | |||
3 | define command { | ||
4 | command_line $USER1$/check_ping -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 5 | ||
5 | command_name check-host-alive | ||
6 | } | ||
7 | |||
8 | define command { | ||
9 | command_line $USER2$/check_md_raid | ||
10 | command_name check_md_raid | ||
11 | } | ||
12 | |||
13 | define command { | ||
14 | command_line $USER2$/check_command -c "$ARG1$" -o "$ARG2$" $ARG3$ | ||
15 | command_name check_command_output | ||
16 | } | ||
17 | |||
18 | |||
19 | define command { | ||
20 | command_line /usr/bin/sudo -u postgres $USER2$/check_postgres_replication "$ARG1$" "$ARG2$" "$ARG3$" | ||
21 | command_name check_postgresql_replication | ||
22 | } | ||
23 | |||
24 | define service { | ||
25 | ## --PUPPET_NAME-- (called '_naginator_name' in the manifest) Databases are present in postgresql | ||
26 | active_checks_enabled 1 | ||
27 | check_command check_command_output!psql -c 'select nspname from pg_catalog.pg_namespace'!public!-r postgres | ||
28 | check_freshness 0 | ||
29 | check_interval 5 | ||
30 | check_period 24x7 | ||
31 | contact_groups admins | ||
32 | event_handler_enabled 1 | ||
33 | flap_detection_enabled 1 | ||
34 | host_name caldance-1.v.immae.eu | ||
35 | is_volatile 0 | ||
36 | max_check_attempts 4 | ||
37 | notification_interval 60 | ||
38 | notification_options w,u,c,r | ||
39 | notification_period 24x7 | ||
40 | notifications_enabled 0 | ||
41 | obsess_over_service 1 | ||
42 | passive_checks_enabled 1 | ||
43 | process_perf_data 1 | ||
44 | retain_nonstatus_information 1 | ||
45 | retain_status_information 1 | ||
46 | retry_interval 1 | ||
47 | service_description Databases are present in postgresql | ||
48 | } | ||
49 | |||
50 | define command { | ||
51 | command_line $USER2$/check_last_file_date "$ARG1$" "$ARG2$" "$ARG3$" | ||
52 | command_name check_last_file_date | ||
53 | } | ||
54 | |||
55 | define command { | ||
56 | command_line $USER2$/check_date "$ARG1$" "$ARG2$" "$ARG3$" | ||
57 | command_name check_date | ||
58 | } | ||
59 | |||
60 | define service { | ||
61 | ## --PUPPET_NAME-- (called '_naginator_name' in the manifest) Postgresql replication for backup-1 is up to date | ||
62 | active_checks_enabled 1 | ||
63 | check_command check_postgresql_replication!backup-1!/run/postgresql!5432 | ||
64 | check_freshness 0 | ||
65 | check_interval 5 | ||
66 | check_period 24x7 | ||
67 | contact_groups admins | ||
68 | event_handler_enabled 1 | ||
69 | flap_detection_enabled 1 | ||
70 | host_name caldance-1.v.immae.eu | ||
71 | is_volatile 0 | ||
72 | max_check_attempts 4 | ||
73 | notification_interval 60 | ||
74 | notification_options w,u,c,r | ||
75 | notification_period 24x7 | ||
76 | notifications_enabled 0 | ||
77 | obsess_over_service 1 | ||
78 | passive_checks_enabled 1 | ||
79 | process_perf_data 1 | ||
80 | retain_nonstatus_information 1 | ||
81 | retain_status_information 1 | ||
82 | retry_interval 1 | ||
83 | service_description Postgresql replication for backup-1 is up to date | ||
84 | } | ||
diff --git a/modules/private/monitoring/conf/services.cfg b/modules/private/monitoring/conf/services.cfg deleted file mode 100644 index 0740dc7..0000000 --- a/modules/private/monitoring/conf/services.cfg +++ /dev/null | |||
@@ -1,27 +0,0 @@ | |||
1 | # vim: filetype=nagios | ||
2 | |||
3 | define service { | ||
4 | name generic-service | ||
5 | active_checks_enabled 1 | ||
6 | check_freshness 0 | ||
7 | check_interval 10 | ||
8 | check_period 24x7 | ||
9 | contact_groups admins | ||
10 | event_handler_enabled 1 | ||
11 | flap_detection_enabled 1 | ||
12 | is_volatile 0 | ||
13 | max_check_attempts 3 | ||
14 | notification_interval 60 | ||
15 | notification_options w,u,c,r,f | ||
16 | notification_period 24x7 | ||
17 | # no notification since we send them to master | ||
18 | notifications_enabled 0 | ||
19 | obsess_over_service 1 | ||
20 | passive_checks_enabled 1 | ||
21 | process_perf_data 1 | ||
22 | register 0 | ||
23 | retain_nonstatus_information 1 | ||
24 | retain_status_information 1 | ||
25 | retry_interval 2 | ||
26 | } | ||
27 | |||
diff --git a/modules/private/monitoring/conf/specific_backup-2.cfg b/modules/private/monitoring/conf/specific_backup-2.cfg deleted file mode 100644 index ff91322..0000000 --- a/modules/private/monitoring/conf/specific_backup-2.cfg +++ /dev/null | |||
@@ -1,36 +0,0 @@ | |||
1 | # vim: filetype=nagios | ||
2 | |||
3 | define service { | ||
4 | service_description Size on /backup2 partition | ||
5 | check_command check_local_disk!10%!5%!/backup2 | ||
6 | use local-service | ||
7 | } | ||
8 | |||
9 | define command { | ||
10 | command_line /run/wrappers/bin/sudo -u "$ARG3$" $USER2$/check_last_file_date "$ARG1$" "$ARG2$" | ||
11 | command_name check_last_file_date | ||
12 | } | ||
13 | |||
14 | define service { | ||
15 | service_description Last backup in /backup2/phare is not too old | ||
16 | check_command check_last_file_date!/backup2/phare!14!backup | ||
17 | use local-service | ||
18 | } | ||
19 | |||
20 | define service { | ||
21 | service_description Last backup in /backup2/immae_eu is not too old | ||
22 | check_command check_last_file_date!/backup2/immae_eu!14!backup | ||
23 | use local-service | ||
24 | } | ||
25 | |||
26 | define service { | ||
27 | service_description Last backup in /backup2/immae_fr is not too old | ||
28 | check_command check_last_file_date!/backup2/immae_fr!14!backup | ||
29 | use local-service | ||
30 | } | ||
31 | |||
32 | define service { | ||
33 | service_description Last postgresql dump in /backup2/eldiron/postgresql_backup is not too old | ||
34 | check_command check_last_file_date!/backup2/eldiron/postgresql_backup!7!postgres | ||
35 | use local-service | ||
36 | } | ||
diff --git a/modules/private/monitoring/conf/specific_eldiron.cfg b/modules/private/monitoring/conf/specific_eldiron.cfg deleted file mode 100644 index fd5a43d..0000000 --- a/modules/private/monitoring/conf/specific_eldiron.cfg +++ /dev/null | |||
@@ -1,29 +0,0 @@ | |||
1 | # vim: filetype=nagios | ||
2 | # | ||
3 | define command { | ||
4 | command_line /run/wrappers/bin/sudo -u postgres $USER2$/check_postgres_replication "$ARG1$" "$ARG2$" "$ARG3$" | ||
5 | command_name check_postgresql_replication | ||
6 | } | ||
7 | |||
8 | define service { | ||
9 | service_description Postgresql replication for backup-1 is up to date | ||
10 | check_command check_postgresql_replication!backup-1!/run/postgresql!5432 | ||
11 | use local-service | ||
12 | } | ||
13 | |||
14 | define service { | ||
15 | service_description Postgresql replication for backup-2 is up to date | ||
16 | check_command check_postgresql_replication!backup-2!/run/postgresql!5432 | ||
17 | use local-service | ||
18 | } | ||
19 | |||
20 | define service { | ||
21 | service_description mailq is empty | ||
22 | use local-service | ||
23 | check_command check_mailq | ||
24 | } | ||
25 | |||
26 | define command { | ||
27 | command_name check_mailq | ||
28 | command_line $USER1$/check_mailq -s -w 1 -c 2 | ||
29 | } | ||
diff --git a/modules/private/monitoring/conf/timeperiods.cfg b/modules/private/monitoring/conf/timeperiods.cfg deleted file mode 100644 index 5ffe4ca..0000000 --- a/modules/private/monitoring/conf/timeperiods.cfg +++ /dev/null | |||
@@ -1,15 +0,0 @@ | |||
1 | # vim: filetype=nagios | ||
2 | |||
3 | define timeperiod { | ||
4 | alias 24 Hours A Day, 7 Days A Week | ||
5 | friday 00:00-24:00 | ||
6 | monday 00:00-24:00 | ||
7 | saturday 00:00-24:00 | ||
8 | sunday 00:00-24:00 | ||
9 | thursday 00:00-24:00 | ||
10 | timeperiod_name 24x7 | ||
11 | tuesday 00:00-24:00 | ||
12 | wednesday 00:00-24:00 | ||
13 | } | ||
14 | |||
15 | |||
diff --git a/modules/private/monitoring/default.nix b/modules/private/monitoring/default.nix index 6062aba..0259cbf 100644 --- a/modules/private/monitoring/default.nix +++ b/modules/private/monitoring/default.nix | |||
@@ -17,42 +17,26 @@ let | |||
17 | pkgs.postgresql | 17 | pkgs.postgresql |
18 | ]} | 18 | ]} |
19 | ''; | 19 | ''; |
20 | defaultObjects = | 20 | toObjects = pkgs.callPackage ./to_objects.nix {}; |
21 | let specific_file = ./conf + "/specific_" + name + ".cfg"; | 21 | commonConfig = { |
22 | eldiron = { | ||
23 | processWarn = "250"; processAlert = "400"; | ||
24 | loadWarn = "8.0"; loadAlert = "10.0"; | ||
25 | }; | ||
26 | backup-2 = { | ||
27 | processWarn = "50"; processAlert = "60"; | ||
28 | loadWarn = "1.0"; loadAlert = "2.0"; | ||
29 | }; | ||
30 | }; | ||
31 | commonObjects = pkgs.callPackage ./objects_common.nix ({ | ||
32 | inherit hostFQDN; | ||
33 | sudo = "/run/wrappers/bin/sudo"; | ||
34 | } // builtins.getAttr name commonConfig); | ||
35 | hostObjects = | ||
36 | let | ||
37 | specific_file = ./. + "/objects_" + name + ".nix"; | ||
22 | in | 38 | in |
23 | builtins.readFile ./conf/local_services.cfg | 39 | lib.attrsets.optionalAttrs (builtins.pathExists specific_file) (pkgs.callPackage specific_file {}); |
24 | + builtins.readFile ./conf/timeperiods.cfg | ||
25 | + builtins.readFile ./conf/services.cfg | ||
26 | + builtins.readFile ./conf/contacts.cfg | ||
27 | + builtins.readFile ./conf/hosts.cfg | ||
28 | + '' | ||
29 | define command { | ||
30 | command_line ${myplugins}/send_nrdp.sh -u "$USER200$" -t "$USER201$" -H "$HOSTADDRESS$" -s "$SERVICEDESC$" -S "$SERVICESTATEID$" -o "$SERVICEOUTPUT$" | ||
31 | command_name notify-master | ||
32 | } | ||
33 | define service { | ||
34 | service_description No mdadm array is degraded | ||
35 | use local-service | ||
36 | check_command check_command_output!${pkgs.mdadm}/bin/mdadm --monitor --scan -1!^$!-s 0 -r root | ||
37 | } | ||
38 | |||
39 | define service { | ||
40 | name local-service | ||
41 | use generic-service | ||
42 | host_name ${hostFQDN} | ||
43 | check_interval 5 | ||
44 | max_check_attempts 4 | ||
45 | register 0 | ||
46 | retry_interval 1 | ||
47 | } | ||
48 | define host { | ||
49 | host_name ${hostFQDN} | ||
50 | alias ${hostFQDN} | ||
51 | address ${hostFQDN} | ||
52 | use linux-server | ||
53 | } | ||
54 | '' | ||
55 | + lib.strings.optionalString (builtins.pathExists specific_file) (builtins.readFile specific_file); | ||
56 | in | 40 | in |
57 | { | 41 | { |
58 | options = { | 42 | options = { |
@@ -122,7 +106,7 @@ in | |||
122 | $USER200$=${myconfig.env.monitoring.status_url} | 106 | $USER200$=${myconfig.env.monitoring.status_url} |
123 | $USER201$=${myconfig.env.monitoring.status_token} | 107 | $USER201$=${myconfig.env.monitoring.status_token} |
124 | ''; | 108 | ''; |
125 | objectDefs = defaultObjects; | 109 | objectDefs = toObjects commonObjects + toObjects hostObjects; |
126 | }; | 110 | }; |
127 | }; | 111 | }; |
128 | } | 112 | } |
diff --git a/modules/private/monitoring/objects_backup-2.nix b/modules/private/monitoring/objects_backup-2.nix new file mode 100644 index 0000000..b8ecb81 --- /dev/null +++ b/modules/private/monitoring/objects_backup-2.nix | |||
@@ -0,0 +1,30 @@ | |||
1 | { ... }: | ||
2 | { | ||
3 | service = [ | ||
4 | { | ||
5 | service_description = "Size on /backup2 partition"; | ||
6 | use = "local-service"; | ||
7 | check_command = ["check_local_disk" "10%" "5%" "/backup2"]; | ||
8 | } | ||
9 | { | ||
10 | service_description = "Last backup in /backup2/phare is not too old"; | ||
11 | use = "local-service"; | ||
12 | check_command = ["check_last_file_date" "/backup2/phare" "14" "backup"]; | ||
13 | } | ||
14 | { | ||
15 | service_description = "Last backup in /backup2/immae_eu is not too old"; | ||
16 | use = "local-service"; | ||
17 | check_command = ["check_last_file_date" "/backup2/immae_eu" "14" "backup"]; | ||
18 | } | ||
19 | { | ||
20 | service_description = "Last backup in /backup2/immae_fr is not too old"; | ||
21 | use = "local-service"; | ||
22 | check_command = ["check_last_file_date" "/backup2/immae_fr" "14" "backup"]; | ||
23 | } | ||
24 | { | ||
25 | service_description = "Last postgresql dump in /backup2/eldiron/postgresql_backup is not too old"; | ||
26 | use = "local-service"; | ||
27 | check_command = ["check_last_file_date" "/backup2/eldiron/postgresql_backup" "7" "postgres"]; | ||
28 | } | ||
29 | ]; | ||
30 | } | ||
diff --git a/modules/private/monitoring/objects_common.nix b/modules/private/monitoring/objects_common.nix new file mode 100644 index 0000000..8466fdb --- /dev/null +++ b/modules/private/monitoring/objects_common.nix | |||
@@ -0,0 +1,179 @@ | |||
1 | { hostFQDN | ||
2 | , processWarn ? "250" | ||
3 | , processAlert ? "400" | ||
4 | , loadWarn ? "8.0" | ||
5 | , loadAlert ? "10.0" | ||
6 | , mdadm | ||
7 | , sudo | ||
8 | , ... | ||
9 | }: | ||
10 | { | ||
11 | host = { | ||
12 | "${hostFQDN}" = { | ||
13 | alias = hostFQDN; | ||
14 | address = hostFQDN; | ||
15 | use = "linux-server"; | ||
16 | }; | ||
17 | }; | ||
18 | service = [ | ||
19 | { | ||
20 | service_description = "Size on root partition"; | ||
21 | use = "local-service"; | ||
22 | check_command = ["check_local_disk" "20%" "10%" "/"]; | ||
23 | } | ||
24 | { | ||
25 | service_description = "Total number of process"; | ||
26 | use = "local-service"; | ||
27 | check_command = [ | ||
28 | "check_local_procs" | ||
29 | processWarn | ||
30 | processAlert | ||
31 | "RSZDT" | ||
32 | ]; | ||
33 | } | ||
34 | { | ||
35 | service_description = "Average load"; | ||
36 | use = "local-service"; | ||
37 | check_command = [ | ||
38 | "check_local_load" | ||
39 | "${loadWarn},${loadWarn},${loadWarn}" | ||
40 | "${loadAlert},${loadAlert},${loadAlert}" | ||
41 | ]; | ||
42 | } | ||
43 | { | ||
44 | service_description = "Swap usage"; | ||
45 | use = "local-service"; | ||
46 | check_command = ["check_local_swap" "20" "10"]; | ||
47 | } | ||
48 | { | ||
49 | service_description = "Memory usage"; | ||
50 | use = "local-service"; | ||
51 | check_command = ["check_memory" "80" "90"]; | ||
52 | } | ||
53 | { | ||
54 | service_description = "NTP is activated and working"; | ||
55 | use = "local-service"; | ||
56 | check_command = ["check_ntp"]; | ||
57 | } | ||
58 | { | ||
59 | service_description = "No mdadm array is degraded"; | ||
60 | use = "local-service"; | ||
61 | check_command = [ | ||
62 | "check_command_output" | ||
63 | "${mdadm}/bin/mdadm --monitor --scan -1" | ||
64 | "^$" | ||
65 | "-s 0 -r root" | ||
66 | ]; | ||
67 | } | ||
68 | ]; | ||
69 | command = { | ||
70 | check_local_disk = "$USER1$/check_disk -w $ARG1$ -c $ARG2$ -p $ARG3$"; | ||
71 | check_local_procs = "$USER1$/check_procs -w $ARG1$ -c $ARG2$ -s $ARG3$"; | ||
72 | check_local_load = "$USER1$/check_load -w $ARG1$ -c $ARG2$"; | ||
73 | check_local_swap = "$USER1$/check_swap -n ok -w $ARG1$ -c $ARG2$"; | ||
74 | check_memory = "$USER2$/check_mem.sh -w $ARG1$ -c $ARG2$"; | ||
75 | check_command_output = "$USER2$/check_command -c \"$ARG1$\" -s 0 -o \"$ARG2$\" $ARG3$"; | ||
76 | check_ntp = "$USER1$/check_ntp_time -t 30 -q -H 0.arch.pool.ntp.org"; | ||
77 | check_postgresql_replication = "${sudo} -u postgres $USER2$/check_postgres_replication \"$ARG1$\" \"$ARG2$\" \"$ARG3$\""; | ||
78 | check_mailq = "$USER1$/check_mailq -s -w 1 -c 2"; | ||
79 | |||
80 | check_host_alive = "$USER1$/check_ping -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 5"; | ||
81 | check_last_file_date = "${sudo} -u \"$ARG3$\" $USER2$/check_last_file_date \"$ARG1$\" \"$ARG2$\""; | ||
82 | |||
83 | # No notify commands, we go through master | ||
84 | # notify_host_by_email = "SERVICENOTIFICATIONID=\"$SERVICENOTIFICATIONID$\" HOSTSTATE=\"$HOSTSTATE$\" HOSTOUTPUT=\"$HOSTOUTPUT$\" $USER2$/notify_by_email host \"$NOTIFICATIONTYPE$\" \"$HOSTALIAS$\" \"$LONGDATETIME$\" \"$CONTACTEMAIL$\" $OVE"; | ||
85 | # #$OVE is to force naemon to run via shell instead of execve which fails here | ||
86 | # notify_service_by_email = "SERVICENOTIFICATIONID=\"$SERVICENOTIFICATIONID$\" SERVICEDESC=\"$SERVICEDESC$\" SERVICESTATE=\"$SERVICESTATE$\" SERVICEOUTPUT=\"$SERVICEOUTPUT$\" $USER2$/notify_by_email service \"$NOTIFICATIONTYPE$\" \"$HOSTALIAS$\" \"$LONGDATETIME$\" \"$CONTACTEMAIL$\" $OVE"; | ||
87 | # #sudo /usr/bin/strace -o /tmp/foo -vf -s 256 -u naemon $USER2$/notify_by_email | ||
88 | # #$OVE is to force naemon to run via shell instead of execve which fails here | ||
89 | |||
90 | notify-master = "$USER2$/send_nrdp.sh -u \"$USER200$\" -t \"$USER201$\" -H \"$HOSTADDRESS$\" -s \"$SERVICEDESC$\" -S \"$SERVICESTATEID$\" -o \"$SERVICEOUTPUT$\""; | ||
91 | }; | ||
92 | timeperiod = { | ||
93 | "24x7" = { | ||
94 | alias = "24 Hours A Day, 7 Days A Week"; | ||
95 | monday = "00:00-24:00"; | ||
96 | tuesday = "00:00-24:00"; | ||
97 | wednesday = "00:00-24:00"; | ||
98 | thursday = "00:00-24:00"; | ||
99 | friday = "00:00-24:00"; | ||
100 | saturday = "00:00-24:00"; | ||
101 | sunday = "00:00-24:00"; | ||
102 | }; | ||
103 | }; | ||
104 | contactgroup = { | ||
105 | admins = { alias = "Naemon Administrators"; }; | ||
106 | }; | ||
107 | # No contact, we go through master | ||
108 | # contact = { | ||
109 | # immae = { | ||
110 | # alias = "Immae"; | ||
111 | # use = "generic-contact"; | ||
112 | # email = "xxxxxxxxxxxxxxxx"; | ||
113 | # }; | ||
114 | # }; | ||
115 | templates = { | ||
116 | service = { | ||
117 | generic-service = { | ||
118 | active_checks_enabled = "1"; | ||
119 | check_freshness = "0"; | ||
120 | check_interval = "10"; | ||
121 | check_period = "24x7"; | ||
122 | contact_groups = "admins"; | ||
123 | event_handler_enabled = "1"; | ||
124 | flap_detection_enabled = "1"; | ||
125 | is_volatile = "0"; | ||
126 | max_check_attempts = "3"; | ||
127 | notification_interval = "60"; | ||
128 | notification_options = "w,u,c,r,f,s"; | ||
129 | notification_period = "24x7"; | ||
130 | notifications_enabled = "0"; # no notification since we send them to master | ||
131 | obsess_over_service = "1"; | ||
132 | passive_checks_enabled = "1"; | ||
133 | process_perf_data = "1"; | ||
134 | retain_nonstatus_information = "1"; | ||
135 | retain_status_information = "1"; | ||
136 | retry_interval = "2"; | ||
137 | }; | ||
138 | local-service = { | ||
139 | use = "generic-service"; | ||
140 | host_name = hostFQDN; | ||
141 | check_interval = "5"; | ||
142 | max_check_attempts = "4"; | ||
143 | retry_interval = "1"; | ||
144 | }; | ||
145 | }; | ||
146 | # No contact, we go through master | ||
147 | # contact = { | ||
148 | # generic-contact = { | ||
149 | # host_notification_commands = "notify_host_by_email"; | ||
150 | # host_notification_options = "d,u,r,f,s"; | ||
151 | # host_notification_period = "24x7"; | ||
152 | # service_notification_commands = "notify_service_by_email"; | ||
153 | # service_notification_options = "w,u,c,r,f,s"; | ||
154 | # service_notification_period = "24x7"; | ||
155 | # }; | ||
156 | # }; | ||
157 | host = { | ||
158 | generic-host = { | ||
159 | event_handler_enabled = "1"; | ||
160 | flap_detection_enabled = "1"; | ||
161 | notification_period = "24x7"; | ||
162 | notifications_enabled = "1"; | ||
163 | process_perf_data = "1"; | ||
164 | retain_nonstatus_information = "1"; | ||
165 | retain_status_information = "1"; | ||
166 | }; | ||
167 | linux-server = { | ||
168 | check_command = "check_host_alive"; | ||
169 | check_interval = "5"; | ||
170 | check_period = "24x7"; | ||
171 | contact_groups = "admins"; | ||
172 | max_check_attempts = "10"; | ||
173 | notification_interval = "120"; | ||
174 | notification_options = "d,u,r,f"; | ||
175 | retry_interval = "1"; | ||
176 | }; | ||
177 | }; | ||
178 | }; | ||
179 | } | ||
diff --git a/modules/private/monitoring/objects_eldiron.nix b/modules/private/monitoring/objects_eldiron.nix new file mode 100644 index 0000000..897fc15 --- /dev/null +++ b/modules/private/monitoring/objects_eldiron.nix | |||
@@ -0,0 +1,15 @@ | |||
1 | { ... }: | ||
2 | { | ||
3 | service = [ | ||
4 | { | ||
5 | service_description = "Postgresql replication for backup-2 is up to date"; | ||
6 | use = "local-service"; | ||
7 | check_command = ["check_postgresql_replication" "backup-2" "/run/postgresql" "5432"]; | ||
8 | } | ||
9 | { | ||
10 | service_description = "mailq is empty"; | ||
11 | use = "local-service"; | ||
12 | check_command = ["check_mailq"]; | ||
13 | } | ||
14 | ]; | ||
15 | } | ||
diff --git a/modules/private/monitoring/to_objects.nix b/modules/private/monitoring/to_objects.nix new file mode 100644 index 0000000..5ad76e0 --- /dev/null +++ b/modules/private/monitoring/to_objects.nix | |||
@@ -0,0 +1,67 @@ | |||
1 | { lib }: | ||
2 | with lib.attrsets; | ||
3 | with lib.strings; | ||
4 | with lib.lists; | ||
5 | with lib.trivial; | ||
6 | let | ||
7 | pad = width: str: let | ||
8 | padWidth = width - stringLength str; | ||
9 | padding = concatStrings (genList (const " ") padWidth); | ||
10 | in str + optionalString (padWidth > 0) padding; | ||
11 | toStr = k: v: | ||
12 | if k == "check_command" && builtins.isList v | ||
13 | then builtins.concatStringsSep "!" v | ||
14 | else v; | ||
15 | |||
16 | toService = service: '' | ||
17 | define service { | ||
18 | ${builtins.concatStringsSep "\n" (mapAttrsToList (k: v: | ||
19 | " ${pad 30 k} ${toStr k v}" | ||
20 | ) service)} | ||
21 | } | ||
22 | ''; | ||
23 | toServices = services: builtins.concatStringsSep "\n" (map toService services); | ||
24 | |||
25 | toCommand = k: v: '' | ||
26 | define command { | ||
27 | ${pad 30 "command_name"} ${k} | ||
28 | ${pad 30 "command_line"} ${v} | ||
29 | } | ||
30 | ''; | ||
31 | toCommands = a: builtins.concatStringsSep "\n" (mapAttrsToList toCommand a); | ||
32 | |||
33 | toOther = keyname: k: v: '' | ||
34 | define ${keyname} { | ||
35 | ${pad 30 "${keyname}_name"} ${k} | ||
36 | ${builtins.concatStringsSep "\n" (mapAttrsToList (kk: vv: | ||
37 | " ${pad 30 kk} ${vv}" | ||
38 | ) v)} | ||
39 | } | ||
40 | ''; | ||
41 | toOthers = keyname: a: builtins.concatStringsSep "\n" (mapAttrsToList (toOther keyname) a); | ||
42 | |||
43 | toTemplate = keyname: k: v: '' | ||
44 | define ${keyname} { | ||
45 | ${pad 30 "name"} ${k} | ||
46 | ${pad 30 "register"} 0 | ||
47 | ${builtins.concatStringsSep "\n" (mapAttrsToList (kk: vv: | ||
48 | " ${pad 30 kk} ${vv}" | ||
49 | ) v)} | ||
50 | } | ||
51 | ''; | ||
52 | toTemplates' = keyname: a: builtins.concatStringsSep "\n" (mapAttrsToList (toTemplate keyname) a); | ||
53 | toTemplates = v: builtins.concatStringsSep "\n" (mapAttrsToList toTemplates' v); | ||
54 | |||
55 | toObjects' = keyname: v: | ||
56 | if keyname == "service" | ||
57 | then toServices v | ||
58 | else if keyname == "command" | ||
59 | then toCommands v | ||
60 | else if keyname == "templates" | ||
61 | then toTemplates v | ||
62 | else if builtins.elem keyname ["host" "contactgroup" "contact" "timeperiod"] | ||
63 | then toOthers keyname v | ||
64 | else ""; | ||
65 | toObjects = v: builtins.concatStringsSep "\n" (mapAttrsToList toObjects' v); | ||
66 | in | ||
67 | toObjects | ||