aboutsummaryrefslogtreecommitdiff
path: root/systems/eldiron/monitoring.nix
blob: 92da7cf1340a816766277a22df44c05e1c1c04a1 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
{ config, pkgs, lib, name, monitoring, ... }:
let
  hostFQDN = config.hostEnv.fqdn;
  emailCheck = monitoring.lib.emailCheck config.myEnv.monitoring.email_check;
in
{
  config.myServices.monitoring.activatedPlugins = [ "memory" "command" "bandwidth" "emails" "mdadm" "postfix" "postgresql" "zfs" "notify-secondary" "smartctl" ];
  config.myServices.monitoring.pluginsArgs.postgresql.package = config.myServices.databases.postgresql.package;
  config.myServices.monitoring.objects = lib.mkMerge [
    (monitoring.lib.objectsCommon {
      inherit hostFQDN;
      hostName = name;
      master = false;
      processWarn = "550"; processAlert = "650";
      loadWarn = "1.0"; loadAlert = "1.2";
      interface = builtins.head (builtins.attrNames config.networking.interfaces);
    })

    {
      service = [
        {
          service_description = "Disk /dev/disk/by-id/ata-ST33000650NS_Z29540DM is sane";
          use = "local-service";
          check_command = [ "check_smartctl" "/dev/disk/by-id/ata-ST33000650NS_Z29540DM" ];
          __passive_servicegroups = "webstatus-resources";

          check_interval = 60;
        }
        {
          service_description = "Disk /dev/disk/by-id/ata-ST33000650NS_Z296JGJ4 is sane";
          use = "local-service";
          check_command = [ "check_smartctl" "/dev/disk/by-id/ata-ST33000650NS_Z296JGJ4" ];
          __passive_servicegroups = "webstatus-resources";

          check_interval = 60;
        }
        {
          service_description = "Disk /dev/disk/by-id/ata-SAMSUNG_MZ7LM480HMHQ-00005_S2UJNX0HA05895 is sane";
          use = "local-service";
          check_command = [ "check_smartctl" "/dev/disk/by-id/ata-SAMSUNG_MZ7LM480HMHQ-00005_S2UJNX0HA05895" ];
          __passive_servicegroups = "webstatus-resources";

          check_interval = 60;
        }
        {
          service_description = "Disk /dev/disk/by-id/ata-Micron_5200_MTFDDAK480TDC_18011BEE03B3 is sane";
          use = "local-service";
          check_command = [ "check_smartctl" "/dev/disk/by-id/ata-Micron_5200_MTFDDAK480TDC_18011BEE03B3" ];
          __passive_servicegroups = "webstatus-resources";

          check_interval = 60;
        }
        {
          service_description = "No mdadm array is degraded";
          use = "local-service";
          check_command = ["check_mdadm"];
          __passive_servicegroups = "webstatus-resources";
        }
        {
          service_description = "Postgresql replication for backup-2 is up to date";
          use = "local-service";
          check_command = ["check_postgresql_replication" "backup-2" "/run/postgresql" "5432"];
          __passive_servicegroups = "webstatus-databases";
        }
        {
          service_description = "No ZFS pool is degraded";
          use = "local-service";
          check_command = ["check_zfs"];
          __passive_servicegroups = "webstatus-resources";
        }
        {
          service_description = "mailq is empty";
          use = "local-service";
          check_command = ["check_mailq"];
          __passive_servicegroups = "webstatus-email";
        }
        (emailCheck "eldiron" hostFQDN // {
          __passive_servicegroups = "webstatus-email";
        })
      ];
    }
  ];
}