From df30e537bd20ac575b1f644425c161a89d737479 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Isma=C3=ABl=20Bouya?= Date: Mon, 15 Nov 2021 22:42:07 +0100 Subject: [PATCH] Anonymize names in files --- .envrc | 1 + .gitattributes | 1 + .gitconfig | 12 ++++++++++ scripts/anonymize | 56 +++++++++++++++++++++++++++++++++++++++++++++++ shell.nix | 2 +- words.json | 6 +++++ 6 files changed, 77 insertions(+), 1 deletion(-) create mode 100644 .gitattributes create mode 100755 scripts/anonymize create mode 100644 words.json diff --git a/.envrc b/.envrc index 438d807..16c9dde 100644 --- a/.envrc +++ b/.envrc @@ -3,6 +3,7 @@ export NIX_PATH=nixpkgs=$(cat $(expand_path nix/sources.json) | jq -r '."nixpkgs NIX_PATH=$NIX_PATH:nixpkgs-nix=$(cat $(expand_path nix/sources.json) | jq -r '."nixpkgs-nix".url') export NIXOPS_ENV_LOADED=1 +export ANONYMIZE_KEY="dedhogryajkegthlwribFecnocItTelilAwdod" PATH_add $(expand_path scripts) PATH_add $(expand_path nixops/scripts) diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..6fcc68e --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +#*.nix filter=anonymize diff --git a/.gitconfig b/.gitconfig index 7aa8870..fe165e2 100644 --- a/.gitconfig +++ b/.gitconfig @@ -7,3 +7,15 @@ textconv = "gpg --quiet -d" [diff "sopsdiffer"] textconv = "sops -d" +[filter "anonymize"] + clean = "./scripts/anonymize -i words.json" + smudge = "./scripts/anonymize -i -d words.json" + required = true +[submodule "nixops/secrets"] + url = gitolite@git.immae.eu:perso/Immae/Config/Nix/Nixops/Secrets + active = true +[remote "origin-stgit"] + url = gitolite@git.immae.eu:perso/Immae/Config/Nix.stgit + push = +refs/stacks/*:refs/stacks/* + push = +refs/patches/*:refs/patches/* + push = +refs/original/*:refs/original/* diff --git a/scripts/anonymize b/scripts/anonymize new file mode 100755 index 0000000..e93e1ed --- /dev/null +++ b/scripts/anonymize @@ -0,0 +1,56 @@ +#!/usr/bin/env python3 + +import sys +import argparse +import os +import json +import re + +parser = argparse.ArgumentParser() +parser.add_argument("words_file", help="File that contains the words to (de)anonymize") +parser.add_argument("--ignore-missing", "-i", action="store_true", help="treat missing file as empty list") +parser.add_argument("--deanonymize", "-d", action="store_true", help="deanonymize") +config = parser.parse_args() + +alphabet="abcdefghijklmnopqrstuvwxyz" + +try: + key = os.environ["ANONYMIZE_KEY"].lower() + assert all([k in alphabet for k in key]) +except KeyError: + print("Please set ANONYMIZE_KEY as environment variable with only letters", file=sys.stderr) + sys.exit(1) + +if not os.path.isfile(config.words_file): + if config.ignore_missing: + print(sys.stdin.read(), end="") + sys.exit(0) + else: + print("Could not find words file", file=sys.stderr) + sys.exit(1) + +words = json.load(open(config.words_file)) + +if any([len(word) > len(key) for word in words]): + print("The key needs to be at least as long as the longest word in the list (append to existing one to keep already mangled words)", file=sys.stderr) + sys.exit(1) + +order = -1 if config.deanonymize else 1 + +def replace(match): + name = match.group() + result = [] + for k in range(len(name)): + if name[k].lower() not in alphabet: + result.append(name[k]) + else: + key_index = alphabet.index(key[k]) + letter_index = alphabet.index(name[k].lower()) + new_letter = alphabet[(letter_index + order * key_index) % len(alphabet)] + if name[k].lower() != name[k]: + new_letter = new_letter.upper() + result.append(new_letter) + return ''.join(result) + +regexp = re.compile("(" + '|'.join([r'(\b' + w + r'\b)' for w in words]) + ")") +print(regexp.sub(replace, sys.stdin.read()), end="") diff --git a/shell.nix b/shell.nix index 2295f8c..3d27d03 100644 --- a/shell.nix +++ b/shell.nix @@ -14,5 +14,5 @@ let }); in pkgs.mkShell { - buildInputs = [ patchedNix pkgs.sops pkgs.morph pkgs.niv pkgs.curl pkgs.shellcheck pkgs.jq pkgs.gnumake pkgs.yq ]; + buildInputs = [ patchedNix pkgs.python3 pkgs.sops pkgs.morph pkgs.niv pkgs.curl pkgs.shellcheck pkgs.jq pkgs.gnumake pkgs.yq ]; } diff --git a/words.json b/words.json new file mode 100644 index 0000000..c91e1de --- /dev/null +++ b/words.json @@ -0,0 +1,6 @@ +[ + "christopheCarpentier\\w*", + "christophe_carpentier", + "Christophe", + "Carpentier" +] -- 2.41.0