|
import json |
|
from tqdm import tqdm |
|
|
|
lang = "" |
|
|
|
with open(f"dset{lang}.txt", "r") as f: |
|
lines = [x.rstrip("\n").lower().split("→") for x in tqdm(f.readlines())] |
|
lines = [(x[0].replace("\\n", "\n"), x[1].replace("\\n", "\n")) for x in lines] |
|
|
|
responses = [] |
|
for i in tqdm(lines): |
|
if i[1] not in responses: |
|
responses.append(i[1]) |
|
|
|
dset = {} |
|
for sample in tqdm(lines): |
|
dset[sample[0]] = responses.index(sample[1]) |
|
|
|
with open(f"dataset{lang}.json", "w") as f: |
|
json.dump(dset, f, ensure_ascii=False) |
|
|
|
with open(f"responses{lang}.txt", "w") as f: |
|
for i in tqdm(responses): |
|
f.write(i+"\n") |
|
|
|
|