fargerm commited on
Commit
82debbb
1 Parent(s): cce785c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -15,7 +15,7 @@ MODELS = {
15
  'Bengali': "Helsinki-NLP/opus-mt-en-bn",
16
  }
17
 
18
- # Manually defined language codes for different language models
19
  LANG_CODE_MAP = {
20
  'French': 'fr',
21
  'Spanish': 'es',
@@ -38,16 +38,16 @@ def translate_text(text, target_lang):
38
  model = MarianMTModel.from_pretrained(model_name)
39
  tokenizer = MarianTokenizer.from_pretrained(model_name)
40
 
41
- # Encode the text and prepare it for translation
42
  encoded_text = tokenizer(text, return_tensors="pt")
43
 
44
- # Get the language code for forced_bos_token_id
45
  lang_code = LANG_CODE_MAP.get(target_lang)
46
  if not lang_code:
47
  return "Error: Language code not found."
48
 
49
  # Translate text
50
- translated = model.generate(**encoded_text, forced_bos_token_id=tokenizer.lang_code_to_id.get(lang_code))
51
 
52
  # Decode the translated text
53
  translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
 
15
  'Bengali': "Helsinki-NLP/opus-mt-en-bn",
16
  }
17
 
18
+ # Language codes for different models
19
  LANG_CODE_MAP = {
20
  'French': 'fr',
21
  'Spanish': 'es',
 
38
  model = MarianMTModel.from_pretrained(model_name)
39
  tokenizer = MarianTokenizer.from_pretrained(model_name)
40
 
41
+ # Encode the text
42
  encoded_text = tokenizer(text, return_tensors="pt")
43
 
44
+ # Language code for forced_bos_token_id
45
  lang_code = LANG_CODE_MAP.get(target_lang)
46
  if not lang_code:
47
  return "Error: Language code not found."
48
 
49
  # Translate text
50
+ translated = model.generate(**encoded_text, forced_bos_token_id=tokenizer.convert_tokens_to_ids(lang_code))
51
 
52
  # Decode the translated text
53
  translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)