File size: 2,466 Bytes
084cf95
 
 
 
 
 
 
 
 
 
 
 
 
 
f8a90a5
 
 
 
 
 
 
 
084cf95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f8a90a5
 
084cf95
 
 
 
 
f8a90a5
 
084cf95
 
 
 
f8a90a5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
import gradio as gr


DEBUG_UI = False
LANGS = {
    'English': 'eng_Latn',
    'Interslavic': 'isv_Latn',
    # 'Интерславик': 'isv_Cyrl',
    'Russian': 'rus_Cyrl',
    'Belarusian': 'bel_Cyrl',
    'Ukrainian': 'ukr_Cyrl',
    'Polish': 'pol_Latn',
    'Silesian': 'szl_Latn',
    'Czech': 'ces_Latn',
    'Slovak': 'slk_Latn',
    'Slovenian': 'slv_Latn',
    'Croatian': 'hrv_Latn',
    'Bosnian': 'bos_Latn',
    'Serbian': 'srp_Cyrl',
    'Macedonian': 'mkd_Cyrl',
    'Bulgarian': 'bul_Cyrl',
    'Esperanto': 'epo_Latn',
    'German': 'deu_Latn',
    'French': 'fra_Latn',
    'Spanish': 'spa_Latn',
}


if DEBUG_UI:
    def translate(text, src_lang, tgt_lang):
        return text    

else:
    # model_name = 'Salavat/nllb-200-distilled-600M-finetuned-isv'
    model_name = 'Salavat/nllb-200-distilled-600M-finetuned-isv_v2'
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    
    def get_lang(lang):
        if lang in LANGS:
            return LANGS[lang]
        return lang
    
    def translate(text, from_, to_):
        """
        Translate the text from source lang to target lang
        """
        translation_pipeline = pipeline(
            "translation", model=model, tokenizer=tokenizer, max_length=400,
            src_lang=LANGS.get(from_, from_), tgt_lang=LANGS.get(to_, to_)
            )
        result = translation_pipeline(text.split('\n'))
        output = '\n'.join(line['translation_text'] for line in result)
        return output


with gr.Blocks() as demo:
    gr.Markdown("# Interslavic translator via NLLB200")
    gr.Markdown("This is a modified version of the original [NLLB-Translator](https://huggingface.co./spaces/Narrativaai/NLLB-Translator) space")
    with gr.Row():
        lang_input = gr.components.Dropdown(label="From", choices=list(LANGS.keys()), value='English')
        lang_output = gr.components.Dropdown(label="To", choices=list(LANGS.keys()), value='Interslavic')
    with gr.Row().style(equal_height=True):
        text_input = gr.components.Textbox(label="Text", lines=5, placeholder="Your text")
        text_output = gr.components.Textbox(label="Result", lines=5, placeholder="Translation...")
    button = gr.Button("Translate")

    button.click(translate, inputs=[text_input, lang_input, lang_output], outputs=text_output)

demo.launch()