]> git.immae.eu Git - perso/Immae/Config/Nix.git/blame - scripts/anonymize
Add kanboard for gebull
[perso/Immae/Config/Nix.git] / scripts / anonymize
CommitLineData
df30e537
IB
1#!/usr/bin/env python3
2
3import sys
4import argparse
5import os
6import json
7import re
8
9parser = argparse.ArgumentParser()
10parser.add_argument("words_file", help="File that contains the words to (de)anonymize")
11parser.add_argument("--ignore-missing", "-i", action="store_true", help="treat missing file as empty list")
12parser.add_argument("--deanonymize", "-d", action="store_true", help="deanonymize")
13config = parser.parse_args()
14
15alphabet="abcdefghijklmnopqrstuvwxyz"
16
17try:
18 key = os.environ["ANONYMIZE_KEY"].lower()
19 assert all([k in alphabet for k in key])
20except KeyError:
21 print("Please set ANONYMIZE_KEY as environment variable with only letters", file=sys.stderr)
22 sys.exit(1)
23
24if not os.path.isfile(config.words_file):
25 if config.ignore_missing:
26 print(sys.stdin.read(), end="")
27 sys.exit(0)
28 else:
29 print("Could not find words file", file=sys.stderr)
30 sys.exit(1)
31
32words = json.load(open(config.words_file))
33
34if any([len(word) > len(key) for word in words]):
35 print("The key needs to be at least as long as the longest word in the list (append to existing one to keep already mangled words)", file=sys.stderr)
36 sys.exit(1)
37
38order = -1 if config.deanonymize else 1
39
40def replace(match):
41 name = match.group()
42 result = []
43 for k in range(len(name)):
44 if name[k].lower() not in alphabet:
45 result.append(name[k])
46 else:
47 key_index = alphabet.index(key[k])
48 letter_index = alphabet.index(name[k].lower())
49 new_letter = alphabet[(letter_index + order * key_index) % len(alphabet)]
50 if name[k].lower() != name[k]:
51 new_letter = new_letter.upper()
52 result.append(new_letter)
53 return ''.join(result)
54
55regexp = re.compile("(" + '|'.join([r'(\b' + w + r'\b)' for w in words]) + ")")
56print(regexp.sub(replace, sys.stdin.read()), end="")