Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -15,7 +15,7 @@ MODELS = {
|
|
15 |
'Bengali': "Helsinki-NLP/opus-mt-en-bn",
|
16 |
}
|
17 |
|
18 |
-
#
|
19 |
LANG_CODE_MAP = {
|
20 |
'French': 'fr',
|
21 |
'Spanish': 'es',
|
@@ -38,16 +38,16 @@ def translate_text(text, target_lang):
|
|
38 |
model = MarianMTModel.from_pretrained(model_name)
|
39 |
tokenizer = MarianTokenizer.from_pretrained(model_name)
|
40 |
|
41 |
-
# Encode the text
|
42 |
encoded_text = tokenizer(text, return_tensors="pt")
|
43 |
|
44 |
-
#
|
45 |
lang_code = LANG_CODE_MAP.get(target_lang)
|
46 |
if not lang_code:
|
47 |
return "Error: Language code not found."
|
48 |
|
49 |
# Translate text
|
50 |
-
translated = model.generate(**encoded_text, forced_bos_token_id=tokenizer.
|
51 |
|
52 |
# Decode the translated text
|
53 |
translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
|
|
|
15 |
'Bengali': "Helsinki-NLP/opus-mt-en-bn",
|
16 |
}
|
17 |
|
18 |
+
# Language codes for different models
|
19 |
LANG_CODE_MAP = {
|
20 |
'French': 'fr',
|
21 |
'Spanish': 'es',
|
|
|
38 |
model = MarianMTModel.from_pretrained(model_name)
|
39 |
tokenizer = MarianTokenizer.from_pretrained(model_name)
|
40 |
|
41 |
+
# Encode the text
|
42 |
encoded_text = tokenizer(text, return_tensors="pt")
|
43 |
|
44 |
+
# Language code for forced_bos_token_id
|
45 |
lang_code = LANG_CODE_MAP.get(target_lang)
|
46 |
if not lang_code:
|
47 |
return "Error: Language code not found."
|
48 |
|
49 |
# Translate text
|
50 |
+
translated = model.generate(**encoded_text, forced_bos_token_id=tokenizer.convert_tokens_to_ids(lang_code))
|
51 |
|
52 |
# Decode the translated text
|
53 |
translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
|