Salavat commited on
Commit
c48dacd
1 Parent(s): 675e248

empty line hallucinations fix

Browse files
Files changed (1) hide show
  1. app.py +3 -1
app.py CHANGED
@@ -40,7 +40,9 @@ else:
40
  lang2id['isv_Cyrl'] = tokenizer(['isv_Cyrl'], add_special_tokens=False)['input_ids'][0][0]
41
 
42
  def translate(text, from_, to_):
43
- inputs = tokenizer(text.split('\n'), return_tensors="pt", padding=True)
 
 
44
  inputs['input_ids'][:, 0] = lang2id[LANGS[from_]]
45
  translated_tokens = model.generate(**inputs, max_length=400, forced_bos_token_id=lang2id[LANGS[to_]])
46
  result = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)
 
40
  lang2id['isv_Cyrl'] = tokenizer(['isv_Cyrl'], add_special_tokens=False)['input_ids'][0][0]
41
 
42
  def translate(text, from_, to_):
43
+ # empty line hallucinations fix
44
+ lines = [f'{line} ' for line text.split('\n')]
45
+ inputs = tokenizer(lines, return_tensors="pt", padding=True)
46
  inputs['input_ids'][:, 0] = lang2id[LANGS[from_]]
47
  translated_tokens = model.generate(**inputs, max_length=400, forced_bos_token_id=lang2id[LANGS[to_]])
48
  result = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)