khulnasoft's picture
Upload 108 files
4fb0bd1 verified
sent2id = dict()
with open('sample300_en.txt', 'r') as f:
i = 1
for line in f.readlines():
sent2id[line.replace('\n', '')] = i
i += 1
with open('output_extractions.txt', 'r') as fin, open('compactIE_1.2_new.txt', 'w') as fout:
for line in fin.readlines():
sentence, extraction, score = line.split('\t')
sentId = sent2id[sentence]
try:
arg1 = extraction[extraction.index('<arg1>') + 6:extraction.index('</arg1>')]
arg1 = arg1.strip()
except:
print("subject error!", extraction)
arg1 = ""
try:
rel = extraction[extraction.index('<rel>') + 5:extraction.index('</rel>')]
rel = rel.strip()
except:
print("predicate error!", extraction)
rel = ""
try:
arg2 = extraction[extraction.index('<arg2>') + 6:extraction.index('</arg2>')]
arg2 = arg2.strip()
if arg2 == "":
continue
except:
print("object error!", extraction)
arg2 = ""
print("{}\t{}\t{}\t{}".format(sentId, arg1, rel, arg2), file=fout)