da03 commited on
Commit
1f1d7aa
1 Parent(s): 1d42781
Files changed (1) hide show
  1. utils.py +46 -0
utils.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys, os
2
+ import uuid
3
+ import shutil
4
+ import subprocess
5
+
6
+
7
+ def normalize_formula(formula):
8
+ unique_filename = str(uuid.uuid4()) + '.tex'
9
+ with open(unique_filename, 'w') as fout:
10
+ fout.write(formula)
11
+
12
+ input_file = unique_filename
13
+ output_file = unique_filename + '.out'
14
+ assert os.path.exists(input_file), input_file
15
+ cmd = "perl -pe 's|hskip(.*?)(cm\\|in\\|pt\\|mm\\|em)|hspace{\\1\\2}|g' %s > %s"%(input_file, output_file)
16
+ ret = subprocess.call(cmd, shell=True)
17
+ if ret != 0:
18
+ assert False
19
+
20
+ temp_file = output_file + '.tmp'
21
+ with open(temp_file, 'w') as fout:
22
+ with open(output_file) as fin:
23
+ for line in fin:
24
+ fout.write(line.replace('\r', ' ').strip() + '\n') # delete \r
25
+
26
+ cmd = "cat %s | node preprocess_latex.js %s > %s "%(temp_file, 'normalize', output_file)
27
+ ret = subprocess.call(cmd, shell=True)
28
+ os.remove(temp_file)
29
+ if ret != 0:
30
+ assert False
31
+ temp_file = output_file + '.tmp'
32
+ shutil.move(output_file, temp_file)
33
+ with open(temp_file) as fin:
34
+ with open(output_file, 'w') as fout:
35
+ for line in fin:
36
+ tokens = line.strip().split()
37
+ tokens_out = []
38
+ for token in tokens:
39
+ if True or is_ascii(token):
40
+ tokens_out.append(token)
41
+ fout.write(' '.join(tokens_out)+'\n')
42
+ formula_normalized = open(output_file).read().strip()
43
+ os.remove(temp_file)
44
+ os.remove(input_file)
45
+ os.remove(output_file)
46
+ return formula_normalized