]> git.immae.eu Git - perso/Immae/Config/Nix.git/commitdiff
Add monitoring for dilion and quatresaisons
authorIsmaël Bouya <ismael.bouya@normalesup.org>
Sun, 27 Dec 2020 14:36:09 +0000 (15:36 +0100)
committerIsmaël Bouya <ismael.bouya@normalesup.org>
Sun, 27 Dec 2020 14:36:09 +0000 (15:36 +0100)
modules/private/monitoring/default.nix
modules/private/monitoring/objects_common.nix
modules/private/monitoring/objects_dilion.nix [new file with mode: 0644]
modules/private/monitoring/objects_eldiron.nix
modules/private/monitoring/objects_monitoring-1.nix
modules/private/monitoring/objects_quatresaisons.nix [new file with mode: 0644]
modules/private/system/dilion.nix
modules/private/system/quatresaisons.nix

index 344e3691ddd3a7344b9336315aac33bdcc991f68..a298f9246965aecd54b085193b950db6ee826cc3 100644 (file)
@@ -16,13 +16,21 @@ let
     else if config.myServices.databases.enable
     then config.myServices.databases.postgresql.package
     else pkgs.postgresql;
+  zfsPlugin = pkgs.fetchurl {
+    url = "https://www.claudiokuenzler.com/monitoring-plugins/check_zpools.sh";
+    sha256 = "0p9ms9340in80jkds4kfspw62xnzsv5s7ni9m28kxyd0bnzkbzhf";
+  };
   myplugins = pkgs.runCommand "buildplugins" {
     buildInputs = [ pkgs.makeWrapper pkgs.perl ];
   } ''
     mkdir $out
+    cp ${zfsPlugin} $out/check_zpool.sh && chmod +x $out/check_zpool.sh
     cp ${./plugins}/* $out/
     patchShebangs $out
     wrapProgram $out/check_command --prefix PATH : ${config.security.wrapperDir}
+    wrapProgram $out/check_zpool.sh --prefix PATH : ${lib.makeBinPath [
+      pkgs.which pkgs.zfs pkgs.gawk
+    ]}
     wrapProgram $out/send_nrdp.sh --prefix PATH : ${lib.makeBinPath [
       pkgs.curl pkgs.jq
     ]}
@@ -79,6 +87,11 @@ let
     '';
   toObjects = pkgs.callPackage ./to_objects.nix {};
   commonConfig = {
+    dilion = {
+      processWarn = "250"; processAlert = "400";
+      loadWarn = "8.0"; loadAlert = "10.0";
+      interface = "eth0";
+    };
     eldiron = {
       processWarn = "250"; processAlert = "400";
       loadWarn = "8.0"; loadAlert = "10.0";
@@ -95,11 +108,16 @@ let
       load15Warn = "1.0"; load15Alert = "2.0";
       interface = "ens3";
     };
+    quatresaisons = {
+      processWarn = "250"; processAlert = "400";
+      loadWarn = "8.0"; loadAlert = "10.0";
+      interface = "eth0";
+    };
   };
   externalObjects = lib.genAttrs [ "tiboqorl-fr" ]
     (n: pkgs.callPackage (./. + "/objects_" + n + ".nix") { inherit emailCheck; });
   masterPassiveObjects = let
-    passiveNodes = lib.attrsets.filterAttrs (n: _: builtins.elem n ["backup-2" "eldiron"]) nodes;
+    passiveNodes = lib.attrsets.filterAttrs (n: _: builtins.elem n ["backup-2" "eldiron" "quatresaisons" "dilion"]) nodes;
     toPassiveServices = map (s: s.passiveInfo.filter s // s.passiveInfo);
     passiveServices = lib.flatten (lib.attrsets.mapAttrsToList
       (_: n: toPassiveServices n.config.myServices.monitoring.services)
index 82043ebdd0b58d7944cb875f9616b3d5c4db6e64..7f553a0044b734caf3975036133ddbf6e4f68604 100644 (file)
@@ -143,6 +143,7 @@ in
     check_smtp = "$USER1$/check_smtp -H $HOSTADDRESS$ -p 25 -S -D 21,15";
     check_tcp = "$USER1$/check_tcp -H $HOSTADDRESS$ -p $ARG1$ -e \"$ARG2$\" -Mcrit";
     check_tcp_ssl = "$USER1$/check_tcp -H $HOSTADDRESS$ -p $ARG1$ -S -D 21,15";
+    check_zfs = "$USER2$/check_zpool.sh -p ALL";
 
     check_host_alive = "$USER1$/check_ping -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 5";
     check_last_file_date = "${sudo} -u \"$ARG3$\" $USER2$/check_last_file_date \"$ARG1$\" \"$ARG2$\"";
diff --git a/modules/private/monitoring/objects_dilion.nix b/modules/private/monitoring/objects_dilion.nix
new file mode 100644 (file)
index 0000000..ea4ec37
--- /dev/null
@@ -0,0 +1,22 @@
+{ lib, hostFQDN, emailCheck, ... }:
+let
+  defaultPassiveInfo = {
+    filter = lib.attrsets.filterAttrs
+      (k: v: builtins.elem k ["service_description"] || builtins.substring 0 1 k == "_");
+    use = "external-passive-service";
+    freshness_threshold = "450";
+    retry_interval = "1";
+    servicegroups = "webstatus-resources";
+    host_name = hostFQDN;
+  };
+in
+{
+  service = [
+    {
+      passiveInfo = defaultPassiveInfo // { servicegroups = "webstatus-resources"; };
+      service_description = "No ZFS pool is degraded";
+      use = "local-service";
+      check_command = ["check_zfs"];
+    }
+  ];
+}
index 92f997f06f0931aa07cf836e0d65ff9bb773d208..2c15dd6d072326061c01e694c322dc6d99aafb7a 100644 (file)
@@ -18,6 +18,12 @@ in
       use = "local-service";
       check_command = ["check_postgresql_replication" "backup-2" "/run/postgresql" "5432"];
     }
+    {
+      passiveInfo = defaultPassiveInfo // { servicegroups = "webstatus-resources"; };
+      service_description = "No ZFS pool is degraded";
+      use = "local-service";
+      check_command = ["check_zfs"];
+    }
     {
       passiveInfo = defaultPassiveInfo // { servicegroups = "webstatus-email"; };
       service_description = "mailq is empty";
index 94b676bd701cc09bb5517060446c5b8be2898204..4b784eddbbf984689e7b0272ca90aa6354d2b602 100644 (file)
@@ -113,17 +113,17 @@ in
     }
 
     # Backup services
-    {
-      service_description = "eriomem backup is up and not full";
-      host_name = "eldiron.immae.eu";
-      use = "external-service";
-      check_command = "check_backup_eriomem";
+    {
+      service_description = "eriomem backup is up and not full";
+      host_name = "eldiron.immae.eu";
+      use = "external-service";
+      check_command = "check_backup_eriomem";
 
-      check_interval = "120";
-      notification_interval = "1440";
+      check_interval = "120";
+      notification_interval = "1440";
 
-      servicegroups = "webstatus-backup";
-    }
+      servicegroups = "webstatus-backup";
+    }
     {
       service_description = "ovh backup is up and not full";
       host_name = "eldiron.immae.eu";
diff --git a/modules/private/monitoring/objects_quatresaisons.nix b/modules/private/monitoring/objects_quatresaisons.nix
new file mode 100644 (file)
index 0000000..3927ef5
--- /dev/null
@@ -0,0 +1,16 @@
+{ lib, hostFQDN, emailCheck, ... }:
+let
+  defaultPassiveInfo = {
+    filter = lib.attrsets.filterAttrs
+      (k: v: builtins.elem k ["service_description"] || builtins.substring 0 1 k == "_");
+    use = "external-passive-service";
+    freshness_threshold = "450";
+    retry_interval = "1";
+    servicegroups = "webstatus-resources";
+    host_name = hostFQDN;
+  };
+in
+{
+  service = [
+  ];
+}
index 4e2a11f44d1e5b5824ba613665b8269790443665..b89b3649878b71e3592b41b5f25e6d00b99f4c75 100644 (file)
@@ -24,6 +24,8 @@
       "/boot/pass.key" = "/boot/pass.key";
     };
     kernel.sysctl."vm.nr_hugepages" = 256; # for xmr-stak
+    # available in nixos-20.09
+    #zfs.requestEncryptionCredentials = [ "zpool/root" ];
   };
   nix.maxJobs = 8;
   powerManagement.cpuFreqGovernor = "powersave";
   nix.binaryCaches = [ "https://hydra.iohk.io" "https://cache.nixos.org" ];
   nix.binaryCachePublicKeys = [ "hydra.iohk.io:f/Ea+s+dFdN+3Y/G+FDgSq+a5NEWhJGzdjvKNGv0/EQ=" ];
 
+  myServices.monitoring.enable = true;
   myServices.certificates.enable = true;
   security.acme.certs."${name}" = {
     user = config.services.nginx.user;
index 353323f1696d76ab06da0b92bb11528e5ed46c2e..e9054609e2cac192c3d210e80a43b0e74bf3fac5 100644 (file)
@@ -274,6 +274,7 @@ in
     }
   ];
 
+  myServices.monitoring.enable = true;
   myServices.certificates.enable = true;
   users.mutableUsers = true;
   system.stateVersion = "21.03";