|
sent2id = dict() |
|
with open('sample300_en.txt', 'r') as f: |
|
i = 1 |
|
for line in f.readlines(): |
|
sent2id[line.replace('\n', '')] = i |
|
i += 1 |
|
|
|
with open('output_extractions.txt', 'r') as fin, open('compactIE_1.2_new.txt', 'w') as fout: |
|
for line in fin.readlines(): |
|
sentence, extraction, score = line.split('\t') |
|
sentId = sent2id[sentence] |
|
try: |
|
arg1 = extraction[extraction.index('<arg1>') + 6:extraction.index('</arg1>')] |
|
arg1 = arg1.strip() |
|
except: |
|
print("subject error!", extraction) |
|
arg1 = "" |
|
try: |
|
rel = extraction[extraction.index('<rel>') + 5:extraction.index('</rel>')] |
|
rel = rel.strip() |
|
except: |
|
print("predicate error!", extraction) |
|
rel = "" |
|
try: |
|
arg2 = extraction[extraction.index('<arg2>') + 6:extraction.index('</arg2>')] |
|
arg2 = arg2.strip() |
|
if arg2 == "": |
|
continue |
|
except: |
|
print("object error!", extraction) |
|
arg2 = "" |
|
print("{}\t{}\t{}\t{}".format(sentId, arg1, rel, arg2), file=fout) |