from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline import gradio as gr DEBUG_UI = False LANGS = { 'English': 'eng_Latn', 'Interslavic': 'isv_Latn', # 'Интерславик': 'isv_Cyrl', 'Russian': 'rus_Cyrl', 'Belarusian': 'bel_Cyrl', 'Ukrainian': 'ukr_Cyrl', 'Polish': 'pol_Latn', 'Silesian': 'szl_Latn', 'Czech': 'ces_Latn', 'Slovak': 'slk_Latn', 'Slovenian': 'slv_Latn', 'Croatian': 'hrv_Latn', 'Bosnian': 'bos_Latn', 'Serbian': 'srp_Cyrl', 'Macedonian': 'mkd_Cyrl', 'Bulgarian': 'bul_Cyrl', 'Esperanto': 'epo_Latn', 'German': 'deu_Latn', 'French': 'fra_Latn', 'Spanish': 'spa_Latn', } if DEBUG_UI: def translate(text, src_lang, tgt_lang): return text else: # model_name = 'Salavat/nllb-200-distilled-600M-finetuned-isv' model_name = 'Salavat/nllb-200-distilled-600M-finetuned-isv_v2' model = AutoModelForSeq2SeqLM.from_pretrained(model_name) tokenizer = AutoTokenizer.from_pretrained(model_name) def get_lang(lang): if lang in LANGS: return LANGS[lang] return lang def translate(text, from_, to_): """ Translate the text from source lang to target lang """ translation_pipeline = pipeline( "translation", model=model, tokenizer=tokenizer, max_length=400, src_lang=LANGS.get(from_, from_), tgt_lang=LANGS.get(to_, to_) ) result = translation_pipeline(text.split('\n')) output = '\n'.join(line['translation_text'] for line in result) return output with gr.Blocks() as demo: gr.Markdown("# Interslavic translator via NLLB200") gr.Markdown("This is a modified version of the original [NLLB-Translator](https://huggingface.co./spaces/Narrativaai/NLLB-Translator) space") with gr.Row(): lang_input = gr.components.Dropdown(label="From", choices=list(LANGS.keys()), value='English') lang_output = gr.components.Dropdown(label="To", choices=list(LANGS.keys()), value='Interslavic') with gr.Row().style(equal_height=True): text_input = gr.components.Textbox(label="Text", lines=5, placeholder="Your text") text_output = gr.components.Textbox(label="Result", lines=5, placeholder="Translation...") button = gr.Button("Translate") button.click(translate, inputs=[text_input, lang_input, lang_output], outputs=text_output) demo.launch()