import gradio as gr from lang_list import ( LANGUAGE_NAME_TO_CODE, T2TT_TARGET_LANGUAGE_NAMES, TEXT_SOURCE_LANGUAGE_NAMES, ) DEFAULT_TARGET_LANGUAGE = "English" from transformers import SeamlessM4TForTextToText from transformers import AutoProcessor model = SeamlessM4TForTextToText.from_pretrained("facebook/hf-seamless-m4t-medium") processor = AutoProcessor.from_pretrained("facebook/hf-seamless-m4t-medium") # text_inputs = processor(text = "Hello, my dog is cute", src_lang="eng", return_tensors="pt") # output_tokens = model.generate(**text_inputs, tgt_lang="pan") # translated_text_from_text = processor.decode(output_tokens[0].tolist(), skip_special_tokens=True) # print(translated_text_from_text) def run_t2tt(input_text: str, source_language: str, target_language: str) -> str: source_language_code = LANGUAGE_NAME_TO_CODE[source_language] target_language_code = LANGUAGE_NAME_TO_CODE[target_language] text_inputs = processor(text = input_text, src_lang=source_language_code , return_tensors="pt") output_tokens = model.generate(**text_inputs, tgt_lang=target_language_code) output = processor.decode(output_tokens[0].tolist(), skip_special_tokens=True) return str(output) with gr.Blocks() as demo_t2tt: with gr.Row(): with gr.Column(): with gr.Group(): input_text = gr.Textbox(label="Input text") with gr.Row(): source_language = gr.Dropdown( label="Source language", choices=TEXT_SOURCE_LANGUAGE_NAMES, value="English", ) target_language = gr.Dropdown( label="Target language", choices=T2TT_TARGET_LANGUAGE_NAMES, value=DEFAULT_TARGET_LANGUAGE, ) btn = gr.Button("Translate") with gr.Column(): output_text = gr.Textbox(label="Translated text") gr.Examples( examples=[ [ "The sinister destruction of the holy Akal Takht and the ruthless massacre of thousands of innocent pilgrims had unmasked the deep-seated hatred and animosity that the Indian Government had been nurturing against Sikhs ever since inde- pendence", "English", "Punjabi", ], [ "It contains. much useful information about administrative, revenue, judicial and ecclesiastical activities in various areas which, it is hoped, would supplement the information available in official records.", "English", "Hindi", ], [ "दुनिया में बहुत सी अलग-अलग भाषाएं हैं और उनमें अपने वर्ण और शब्दों का भंडार होता है. इसमें में कुछ उनके अपने शब्द होते हैं तो कुछ ऐसे भी हैं, जो दूसरी भाषाओं से लिए जाते हैं.", "Hindi", "Punjabi", ], [ "ਸੂੂਬੇ ਦੇ ਕਈ ਜ਼ਿਲ੍ਹਿਆਂ ’ਚ ਬੁੱਧਵਾਰ ਸਵੇਰੇ ਸੰਘਣੀ ਧੁੰਦ ਛਾਈ ਰਹੀ ਤੇ ਤੇਜ਼ ਹਵਾਵਾਂ ਨੇ ਕਾਂਬਾ ਹੋਰ ਵਧਾ ਦਿੱਤਾ। ਸੱਤ ਸ਼ਹਿਰਾਂ ’ਚ ਦਿਨ ਦਾ ਤਾਪਮਾਨ ਦਸ ਡਿਗਰੀ ਸੈਲਸੀਅਸ ਦੇ ਆਸਪਾਸ ਰਿਹਾ। ਸੂਬੇ ’ਚ ਵੱਧ ਤੋਂ ਵੱਧ ਤਾਪਮਾਨ ’ਚ ਵੀ ਦਸ ਡਿਗਰੀ ਸੈਲਸੀਅਸ ਦੀ ਗਿਰਾਵਟ ਦਰਜ ਕੀਤੀ ਗਈ", "Punjabi", "English", ], ], inputs=[input_text, source_language, target_language], outputs=output_text, fn=run_t2tt, cache_examples=True, api_name=False, ) gr.on( triggers=[input_text.submit, btn.click], fn=run_t2tt, inputs=[input_text, source_language, target_language], outputs=output_text, api_name="t2tt", ) if __name__ == "__main__": demo_t2tt.launch()