]>
git.immae.eu Git - perso/Immae/Config/Nix.git/blob - scripts/anonymize
9 parser
= argparse
.ArgumentParser()
10 parser
.add_argument("words_file", help="File that contains the words to (de)anonymize")
11 parser
.add_argument("--ignore-missing", "-i", action
="store_true", help="treat missing file as empty list")
12 parser
.add_argument("--deanonymize", "-d", action
="store_true", help="deanonymize")
13 config
= parser
.parse_args()
15 alphabet
="abcdefghijklmnopqrstuvwxyz"
18 key
= os
.environ
["ANONYMIZE_KEY"].lower()
19 assert all([k
in alphabet
for k
in key
])
21 print("Please set ANONYMIZE_KEY as environment variable with only letters", file=sys
.stderr
)
24 if not os
.path
.isfile(config
.words_file
):
25 if config
.ignore_missing
:
26 print(sys
.stdin
.read(), end
="")
29 print("Could not find words file", file=sys
.stderr
)
32 words
= json
.load(open(config
.words_file
))
34 if any([len(word
) > len(key
) for word
in words
]):
35 print("The key needs to be at least as long as the longest word in the list (append to existing one to keep already mangled words)", file=sys
.stderr
)
38 order
= -1 if config
.deanonymize
else 1
43 for k
in range(len(name
)):
44 if name
[k
].lower() not in alphabet
:
45 result
.append(name
[k
])
47 key_index
= alphabet
.index(key
[k
])
48 letter_index
= alphabet
.index(name
[k
].lower())
49 new_letter
= alphabet
[(letter_index
+ order
* key_index
) % len(alphabet
)]
50 if name
[k
].lower() != name
[k
]:
51 new_letter
= new_letter
.upper()
52 result
.append(new_letter
)
53 return ''.join(result
)
55 regexp
= re
.compile("(" + '|'.join([r
'(\b' + w
+ r
'\b)' for w
in words
]) + ")")
56 print(regexp
.sub(replace
, sys
.stdin
.read()), end
="")