Initial commit
This commit is contained in:
12
scripts/detex-languagetool.py
Executable file
12
scripts/detex-languagetool.py
Executable file
@ -0,0 +1,12 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
dir_path = os.path.dirname(os.path.realpath(__file__))
|
||||
|
||||
subprocess.call('cat ' + sys.argv[-1] + ' | '
|
||||
+ os.path.join(dir_path, 'detex.py') + ' | '
|
||||
+ 'languagetool ' + ' '.join(sys.argv[1:-1]),
|
||||
shell=True)
|
81
scripts/detex.py
Executable file
81
scripts/detex.py
Executable file
@ -0,0 +1,81 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import re
|
||||
import sys
|
||||
|
||||
|
||||
def swallow(match):
|
||||
return ' ' * len(match.group(0))
|
||||
|
||||
|
||||
def swallow_command(match):
|
||||
return ' ' * (len(match.group(1)) + 1) + ' ' + match.group(2) + ' '
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
text = sys.stdin.read()
|
||||
text_len = len(text)
|
||||
|
||||
# \hyp
|
||||
text = re.subn(r'(\w+)\\hyp\{\}(\w+)', r' \1-\2 ', text)[0]
|
||||
text = re.subn(r'(\w+)\\fshyp\{\}(\w+)', r' \1-\2 ', text)[0]
|
||||
|
||||
# glossary entries
|
||||
def replace_glossary(match):
|
||||
text = match.group(2).replace('-', ' ')
|
||||
if match.group(1).endswith('pl'):
|
||||
text += 's'
|
||||
if match.group(1)[0].isupper():
|
||||
text = text[0].upper() + text[1:]
|
||||
text = ' ' * len(match.group(1)) + ' ' + text + ' '
|
||||
if match.group(1).endswith('pl'):
|
||||
text = text[1:]
|
||||
return text
|
||||
text = re.subn(r'\\((?:newdef)?[gG]ls(?:pl)?){((?:\w+-?)+?)}',
|
||||
replace_glossary, text)[0]
|
||||
|
||||
# acronyms
|
||||
def replace_acronym(match):
|
||||
return ' ' * len(match.group(1)) + ' ' + match.group(2) + ' '
|
||||
text = re.subn(r'\\([aA]cr.*?){(.+?)}',
|
||||
replace_acronym, text)[0]
|
||||
|
||||
# remove keypoints
|
||||
text = re.subn(r'\\keypoint\{.*?\}', swallow, text)[0]
|
||||
|
||||
# remove autocites
|
||||
text = re.subn(r'~?\\[aA]utocite(?:\[.+?\])?\{.*?\}', swallow, text)[0]
|
||||
|
||||
# Remove textcites
|
||||
def replace_textcite(match):
|
||||
template = 'Foo and Bar'
|
||||
return template + ' ' * (len(match.group(0)) - len(template))
|
||||
text = re.subn(r'\\[tT]extcite\{(.*?)\}', replace_textcite, text)[0]
|
||||
|
||||
# citesoftware
|
||||
text = re.subn(r'\\(citesoftware)\{(.*?)\}', swallow_command, text)[0]
|
||||
|
||||
# Remove common surrounding markup
|
||||
text = re.subn(r'\\(emph|texttt|textit|texttt|texthtt)\{(.*?)\}',
|
||||
swallow_command, text)[0]
|
||||
|
||||
# Remove abbreviations
|
||||
text = re.subn(r'\\eg\b', 'eg.', text)[0]
|
||||
text = re.subn(r'\\cf\b', 'cf.', text)[0]
|
||||
text = re.subn(r'\\ie\b', 'ie.', text)[0]
|
||||
|
||||
# references
|
||||
text = re.subn(r'\\([vV]?ref)\{(.*?)\}', swallow_command, text)[0]
|
||||
|
||||
# remove comments at line end
|
||||
text = re.subn(r'([^\\])%.*', '\\1', text)[0]
|
||||
|
||||
# do not move things around too much
|
||||
print(text)
|
||||
|
||||
assert len(text) == text_len
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
Reference in New Issue
Block a user