Engr-Saeed commited on
Commit
8a60c72
·
verified ·
1 Parent(s): 1dfef7d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +100 -0
app.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import MarianMTModel, MarianTokenizer
3
+
4
+ # Define a list of supported language pairs
5
+ languages = {
6
+ 'English': 'en',
7
+ 'Urdu': 'ur',
8
+ 'French': 'fr',
9
+ 'Spanish': 'es',
10
+ 'German': 'de',
11
+ 'Chinese': 'zh',
12
+ 'Italian': 'it',
13
+ 'Russian': 'ru',
14
+ 'Japanese': 'ja',
15
+ 'Arabic': 'ar',
16
+ 'Hindi': 'hi',
17
+ }
18
+
19
+ # Define supported language pairs
20
+ language_pairs = {
21
+ ('en', 'ur'): 'Helsinki-NLP/opus-mt-en-ur',
22
+ ('ur', 'en'): 'Helsinki-NLP/opus-mt-ur-en',
23
+ ('en', 'fr'): 'Helsinki-NLP/opus-mt-en-fr',
24
+ ('fr', 'en'): 'Helsinki-NLP/opus-mt-fr-en',
25
+ ('en', 'es'): 'Helsinki-NLP/opus-mt-en-es',
26
+ ('es', 'en'): 'Helsinki-NLP/opus-mt-es-en',
27
+ ('en', 'de'): 'Helsinki-NLP/opus-mt-en-de',
28
+ ('de', 'en'): 'Helsinki-NLP/opus-mt-de-en',
29
+ ('en', 'zh'): 'Helsinki-NLP/opus-mt-en-zh',
30
+ ('zh', 'en'): 'Helsinki-NLP/opus-mt-zh-en',
31
+ ('en', 'it'): 'Helsinki-NLP/opus-mt-en-it',
32
+ ('it', 'en'): 'Helsinki-NLP/opus-mt-it-en',
33
+ ('en', 'ru'): 'Helsinki-NLP/opus-mt-en-ru',
34
+ ('ru', 'en'): 'Helsinki-NLP/opus-mt-ru-en',
35
+ ('en', 'ja'): 'Helsinki-NLP/opus-mt-en-ja',
36
+ ('ja', 'en'): 'Helsinki-NLP/opus-mt-ja-en',
37
+ ('en', 'ar'): 'Helsinki-NLP/opus-mt-en-ar',
38
+ ('ar', 'en'): 'Helsinki-NLP/opus-mt-ar-en',
39
+ ('en', 'hi'): 'Helsinki-NLP/opus-mt-en-hi',
40
+ ('hi', 'en'): 'Helsinki-NLP/opus-mt-hi-en',
41
+ # Add more pairs as available
42
+ }
43
+
44
+ def load_model(src_lang, tgt_lang):
45
+ model_name = language_pairs.get((src_lang, tgt_lang))
46
+ if not model_name:
47
+ raise ValueError(f"No available model for {src_lang} to {tgt_lang}")
48
+
49
+ tokenizer = MarianTokenizer.from_pretrained(model_name)
50
+ model = MarianMTModel.from_pretrained(model_name)
51
+ return model, tokenizer
52
+
53
+ def translate(text, src_lang, tgt_lang):
54
+ model, tokenizer = load_model(src_lang, tgt_lang)
55
+ inputs = tokenizer.encode(text, return_tensors="pt", padding=True)
56
+ translated = model.generate(inputs)
57
+ return tokenizer.decode(translated[0], skip_special_tokens=True)
58
+
59
+ def translate_chain(text, src_lang, tgt_lang):
60
+ if src_lang != 'en':
61
+ text = translate(text, src_lang, 'en')
62
+ if tgt_lang != 'en':
63
+ text = translate(text, 'en', tgt_lang)
64
+ return text
65
+
66
+ def translate_ui(text, source_language, target_language):
67
+ src_lang = languages[source_language]
68
+ tgt_lang = languages[target_language]
69
+
70
+ try:
71
+ return translate(text, src_lang, tgt_lang)
72
+ except ValueError:
73
+ return translate_chain(text, src_lang, tgt_lang)
74
+
75
+ # Streamlit App UI
76
+ st.title("Multilingual Translator")
77
+ st.write("Translate text between various languages including Urdu, French, Spanish, and more.")
78
+
79
+ # Input text
80
+ text = st.text_area("Enter text to translate", height=100)
81
+
82
+ # Source and Target Languages
83
+ source_language = st.selectbox("Select Source Language", list(languages.keys()))
84
+ target_language = st.selectbox("Select Target Language", list(languages.keys()))
85
+
86
+ # Translate Button
87
+ if st.button("Translate"):
88
+ if text.strip():
89
+ translation = translate_ui(text, source_language, target_language)
90
+ st.text_area("Translated Text", translation, height=100)
91
+ else:
92
+ st.warning("Please enter text to translate.")
93
+
94
+ # About Section
95
+ st.sidebar.title("About")
96
+ st.sidebar.info(
97
+ """
98
+ This app allows you to translate text between multiple languages using the MarianMT model from Hugging Face's Helsinki-NLP collection.
99
+ """
100
+ )