]> git.immae.eu Git - perso/Immae/Config/Nix.git/blob - scripts/anonymize
Anonymize names in files
[perso/Immae/Config/Nix.git] / scripts / anonymize
1 #!/usr/bin/env python3
2
3 import sys
4 import argparse
5 import os
6 import json
7 import re
8
9 parser = argparse.ArgumentParser()
10 parser.add_argument("words_file", help="File that contains the words to (de)anonymize")
11 parser.add_argument("--ignore-missing", "-i", action="store_true", help="treat missing file as empty list")
12 parser.add_argument("--deanonymize", "-d", action="store_true", help="deanonymize")
13 config = parser.parse_args()
14
15 alphabet="abcdefghijklmnopqrstuvwxyz"
16
17 try:
18 key = os.environ["ANONYMIZE_KEY"].lower()
19 assert all([k in alphabet for k in key])
20 except KeyError:
21 print("Please set ANONYMIZE_KEY as environment variable with only letters", file=sys.stderr)
22 sys.exit(1)
23
24 if not os.path.isfile(config.words_file):
25 if config.ignore_missing:
26 print(sys.stdin.read(), end="")
27 sys.exit(0)
28 else:
29 print("Could not find words file", file=sys.stderr)
30 sys.exit(1)
31
32 words = json.load(open(config.words_file))
33
34 if any([len(word) > len(key) for word in words]):
35 print("The key needs to be at least as long as the longest word in the list (append to existing one to keep already mangled words)", file=sys.stderr)
36 sys.exit(1)
37
38 order = -1 if config.deanonymize else 1
39
40 def replace(match):
41 name = match.group()
42 result = []
43 for k in range(len(name)):
44 if name[k].lower() not in alphabet:
45 result.append(name[k])
46 else:
47 key_index = alphabet.index(key[k])
48 letter_index = alphabet.index(name[k].lower())
49 new_letter = alphabet[(letter_index + order * key_index) % len(alphabet)]
50 if name[k].lower() != name[k]:
51 new_letter = new_letter.upper()
52 result.append(new_letter)
53 return ''.join(result)
54
55 regexp = re.compile("(" + '|'.join([r'(\b' + w + r'\b)' for w in words]) + ")")
56 print(regexp.sub(replace, sys.stdin.read()), end="")