File size: 3,230 Bytes
95a5ee1
f28142c
95a5ee1
5aa2f07
95a5ee1
52bba6a
 
 
 
95a5ee1
e41acb8
 
3c434c0
e78a989
95a5ee1
 
 
 
 
 
 
 
 
 
 
 
14b1706
 
be3bdac
87667c1
 
86d654e
be3bdac
 
 
14b1706
273c538
 
f28142c
273c538
fd237ff
f28142c
fd237ff
f28142c
 
 
273c538
a0edb0d
 
 
f28142c
 
65ba3b7
f28142c
 
 
 
 
51b4b18
 
f28142c
51b4b18
 
f28142c
51b4b18
 
14b1706
 
 
 
be3bdac
266a47d
 
78d5868
 
 
14b1706
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import gradio as gr
import torch
from huggingface_hub import hf_hub_download
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline

REPO_ID_NLLB = "facebook/nllb-200-distilled-600M"
REPO_ID_MARIANNMT_en = "mbarnig/MarianNMT-tatoeba-en-lb"
REPO_ID_MARIANNMT_lb = "mbarnig/MarianNMT-tatoeba-lb-en"
REPO_ID_T5MT5 = "mbarnig/T5-mt5-tatoeba-en-lb"

my_title = "🇬🇧 Mir iwwersetzen vun an op Lëtzebuergesch ! 🇫🇷"
my_description = "English-Luxembourgish machine translation (MT) demo based on 3 open-source transformer models: Facebook-NLLB, Microsoft-MarianNMT & Google-T5/mt5."
my_article = "<h3>User guide</h3><p>1. Press the submit button to translate an english text with the default values. 2. Compare the result with the luxembourgish example. 3. Select a model and a translation direction and enter your own text. Have fun !</p><p>Go to <a href='https://www.web3.lu/'>Internet with a Brain</a> to read my french publication <a href='https://www.web3.lu/'>Das Küsschen und die Sonne stritten sich ...</a> about the history of machine translation in Luxembourg from 1975 until today.</p>"
default_input = "The North Wind and the Sun were disputing which was the stronger, when a traveler came along wrapped in a warm cloak."

TRANSLATION_MODELS = [
    "NLLB",
    "MarianNMT",
    "T5/mt5"
]

TRANSLATION_DIRECTION = [
    "en -> lb",
    "lb -> en"
]

EXAMPLE = "..."

my_inputs = [
    gr.Textbox(lines=5, label="Input", value=default_input),
    gr.Radio(label="Translation Model", choices = TRANSLATION_MODELS, value = "NLLB"),
    gr.Radio(label="Translation Direction", choices = TRANSLATION_DIRECTION, value = "en -> lb")
]

my_output = gr.Textbox(lines=5, label="Translation")

def customization(myModel, direc):
    if myModel == "NLLB":
        translator = pipeline("translation", model=REPO_ID_NLLB)   
    elif myModel == "MarianNMT":
        if direc == "en -> lb":
            translator = pipeline("translation", model=REPO_ID_MARIANNMT_en)        
        elif direc == "lb -> en":
            translator = pipeline("translation", model=REPO_ID_MARIANNMT_lb)        
        else:
            print("Please select a Translation Direction !")    
    elif myModel == "T5/mt5":
       translator = pipeline("translation", model=REPO_ID_T5MT5) 
       # tokenizer = AutoTokenizer.from_pretrained("mbarnig/T5-mt5-taboeta-en-lb")
       # model = AutoModelForSeq2SeqLM.from_pretrained("mbarnig/T5-mt5-taboeta-en-lb")       
    else:
       print("Please select a Translation Model !")
    return myModel

def iwwersetz(source_text, model, direc):
    translator = customization(model, direc)
    if model == "NLLB":
        if direc == "en -> lb":
            # translation = translator("en", "lb", source_text)
            translation = source_text
        else:
            # translation = translator("lb", "en", source_text) 
            translation = source_text       
    else:
        # translation = translator(source_text)
        translation = source_text
    return translation
    
demo=gr.Interface(
   fn=iwwersetz,
   inputs=my_inputs,
   outputs=my_output,
   title=my_title, 
   description=my_description, 
   article=my_article,
   allow_flagging=False)
demo.launch()