aboutsummaryrefslogtreecommitdiff
path: root/systems/dilion/monitoring.nix
blob: 5c4e5b0853d67c5570120f23e70e37403ae6bee8 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
{ lib, pkgs, config, name, monitoring, ... }:
let
  hostFQDN = config.hostEnv.fqdn;
  zfs_snapshot = name: {
    service_description = "ZFS snapshot ${name} happened not too long ago";
    use = "local-service";
    check_command = ["check_zfs_snapshot" name];
    __passive_servicegroups = "webstatus-resources";
  };
in
{
  config.myServices.monitoring.activatedPlugins = [ "memory" "command" "bandwidth" "mdadm" "zfs" "notify-secondary" "smartctl" ];
  config.myServices.monitoring.objects = lib.mkMerge [
    (monitoring.lib.objectsCommon {
      inherit hostFQDN;
      hostName = name;
      master = false;
      processWarn = "250"; processAlert = "400";
      loadWarn = "1.0"; loadAlert = "1.2";
      interface = builtins.head (builtins.attrNames config.networking.interfaces);
    })

    {
      service = [
        {
          service_description = "Disk /dev/disk/by-id/ata-Micron_1100_MTFDDAK512TBN_171216BD2A4B is sane";
          use = "local-service";
          check_command = [ "check_smartctl" "/dev/disk/by-id/ata-Micron_1100_MTFDDAK512TBN_171216BD2A4B" ];
          __passive_servicegroups = "webstatus-resources";

          check_interval = 60;
        }
        {
          service_description = "Disk /dev/disk/by-id/ata-Micron_1100_MTFDDAK512TBN_171216BD2A70 is sane";
          use = "local-service";
          check_command = [ "check_smartctl" "/dev/disk/by-id/ata-Micron_1100_MTFDDAK512TBN_171216BD2A70" ];
          __passive_servicegroups = "webstatus-resources";

          check_interval = 60;
        }
        {
          service_description = "Disk /dev/disk/by-id/ata-TOSHIBA_MG04ACA400N_96K1K87YFVLC is sane";
          use = "local-service";
          check_command = [ "check_smartctl" "/dev/disk/by-id/ata-TOSHIBA_MG04ACA400N_96K1K87YFVLC" ];
          __passive_servicegroups = "webstatus-resources";

          check_interval = 60;
        }
        {
          service_description = "Disk /dev/disk/by-id/ata-WDC_WD3000FYYZ-01UL1B2_WD-WMC1F0E4X6WP is sane";
          use = "local-service";
          check_command = [ "check_smartctl" "/dev/disk/by-id/ata-WDC_WD3000FYYZ-01UL1B2_WD-WMC1F0E4X6WP" ];
          __passive_servicegroups = "webstatus-resources";

          check_interval = 60;
        }
        {
          service_description = "No mdadm array is degraded";
          use = "local-service";
          check_command = ["check_mdadm"];
          __passive_servicegroups = "webstatus-resources";
        }
        {
          service_description = "No ZFS pool is degraded";
          use = "local-service";
          check_command = ["check_zfs"];
          __passive_servicegroups = "webstatus-resources";
        }
        (zfs_snapshot "zpool/backup/eldiron/zpool/root")
        (zfs_snapshot "zpool/backup/eldiron/zpool/root/etc")
        (zfs_snapshot "zpool/backup/eldiron/zpool/root/var")
      ];
    }
  ];
}