sent2id = dict() with open('sample300_en.txt', 'r') as f: i = 1 for line in f.readlines(): sent2id[line.replace('\n', '')] = i i += 1 with open('output_extractions.txt', 'r') as fin, open('compactIE_1.2_new.txt', 'w') as fout: for line in fin.readlines(): sentence, extraction, score = line.split('\t') sentId = sent2id[sentence] try: arg1 = extraction[extraction.index('') + 6:extraction.index('')] arg1 = arg1.strip() except: print("subject error!", extraction) arg1 = "" try: rel = extraction[extraction.index('') + 5:extraction.index('')] rel = rel.strip() except: print("predicate error!", extraction) rel = "" try: arg2 = extraction[extraction.index('') + 6:extraction.index('')] arg2 = arg2.strip() if arg2 == "": continue except: print("object error!", extraction) arg2 = "" print("{}\t{}\t{}\t{}".format(sentId, arg1, rel, arg2), file=fout)