File size: 1,159 Bytes
4fb0bd1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
sent2id = dict()
with open('sample300_en.txt', 'r') as f:
    i = 1
    for line in f.readlines():
        sent2id[line.replace('\n', '')] = i
        i += 1

with open('output_extractions.txt', 'r') as fin, open('compactIE_1.2_new.txt', 'w') as fout:
    for line in fin.readlines():
        sentence, extraction, score = line.split('\t')
        sentId = sent2id[sentence]
        try:
            arg1 = extraction[extraction.index('<arg1>') + 6:extraction.index('</arg1>')]
            arg1 = arg1.strip()
        except:
            print("subject error!", extraction)
            arg1 = ""
        try:
            rel = extraction[extraction.index('<rel>') + 5:extraction.index('</rel>')]
            rel = rel.strip()
        except:
            print("predicate error!", extraction)
            rel = ""
        try:
            arg2 = extraction[extraction.index('<arg2>') + 6:extraction.index('</arg2>')]
            arg2 = arg2.strip()
            if arg2 == "":
                continue
        except:
            print("object error!", extraction)
            arg2 = ""
        print("{}\t{}\t{}\t{}".format(sentId, arg1, rel, arg2), file=fout)