Jordan Myers commited on
Commit
0101c12
·
1 Parent(s): 5d5e348

more updates

Browse files
Files changed (2) hide show
  1. .gitignore +2 -0
  2. app.py +34 -73
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ .venv
2
+ __pycache__
app.py CHANGED
@@ -2,7 +2,6 @@ import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
3
  import torch
4
 
5
- # this model was loaded from https://hf.co/models
6
  model = AutoModelForSeq2SeqLM.from_pretrained("Jayyydyyy/m2m100_418m_tokipona")
7
  tokenizer = AutoTokenizer.from_pretrained("facebook/m2m100_418M")
8
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
@@ -38,62 +37,11 @@ def translate(text, src_lang, tgt_lang, candidates:int):
38
  outs = model.generate(**{**ins, **gen_args})
39
  output = tokenizer.batch_decode(outs.sequences, skip_special_tokens=True)
40
 
41
- return output
42
-
43
- # app = gr.Interface(
44
- # fn=translate,
45
- # inputs=[
46
- # gr.components.Textbox(label="Text"),
47
- # gr.components.Dropdown(label="Source Language", choices=list(LANG_CODES.keys())),
48
- # gr.components.Dropdown(label="Target Language", choices=list(LANG_CODES.keys())),
49
- # gr.Slider(label="Number of return sequences", value=3, minimum=1, maximum=12, step=1)
50
- # ],
51
- # outputs=["text"],
52
- # examples=[
53
- # ["Welcome to my translation app.", "English", "toki pona", 3],
54
- # ["Its not always perfect, but its pretty okay!", "English", "toki pona", 3],
55
- # ["ilo pi ante toki ni li pona a!", "toki pona", "English", 3],
56
- # ["kijetesantakalu li pona", "toki pona", "English", 3],
57
- # ["mi li toki e toki pona", "toki pona", "toki pona", 3]
58
- # ],
59
- # cache_examples=False,
60
- # article="""
61
- # # A simple English / toki pona Neural Machine Translation App!
62
-
63
- # ### toki a! 💬
64
-
65
- # This is a simple english to toki pona / toki pona to english neural machine translation app.
66
-
67
- # Input your text to translate, a source language and target language, and desired number of return sequences!
68
-
69
- # ### Grammaticality / Regularization
70
- # English -> English and/or toki pona -> toki pona will result in some form of regularization.
71
-
72
- # This can approximate grammaticality, but it isn't always the best.
73
-
74
- # For example, "mi li toki e toki pona" [src: toki pona, tgt: toki pona] will result in ['mi toki e toki pona.', 'mi toki pona.', 'mi toki e toki pona']
75
- # (Thus, the ungrammatical "li" is dropped)
76
-
77
- # ### Model and Data
78
- # This app utilizes a fine-tuned version of Facebook/Meta AI's M2M100 418M param model.
79
-
80
- # By leveraging the pretrained weights of the massively multilingual M2M100 model,
81
- # we can jumpstart our transfer learning to accomplish machine translation for toki pona!
82
-
83
- # The model was fine-tuned on the English/toki pona bitexts found at https://tatoeba.org/
84
-
85
- # ### This app is a work in progress and obviously not all translations will be perfect.
86
- # In addition to parameter quantity and the hyper-parameters used while training,
87
- # the *quality of data* found on Tatoeba directly influences the perfomance of projects like this!
88
-
89
- # If you wish to contribute, please simply add high quality and diverse translations to Tatoeba!
90
- # """,
91
- # title="English / toki pona Translation"
92
- # )
93
 
94
  with gr.Blocks() as app:
95
- gr.Markdown("""
96
- # A simple English / toki pona Neural Machine Translation App!
97
 
98
  ### toki a! 💬
99
 
@@ -101,13 +49,15 @@ with gr.Blocks() as app:
101
 
102
  Input your text to translate, a source language and target language, and desired number of return sequences!
103
 
104
- ### Grammaticality / Regularization
105
- English -> English and/or toki pona -> toki pona will result in some form of regularization.
 
106
 
107
- This can approximate grammaticality, but it isn't always the best.
108
 
109
- For example, "mi li toki e toki pona" [src: toki pona, tgt: toki pona] will result in ['mi toki e toki pona.', 'mi toki pona.', 'mi toki e toki pona']
110
- (Thus, the ungrammatical "li" is dropped)
 
111
 
112
  ### Model and Data
113
  This app utilizes a fine-tuned version of Facebook/Meta AI's M2M100 418M param model.
@@ -121,19 +71,30 @@ with gr.Blocks() as app:
121
  In addition to parameter quantity and the hyper-parameters used while training,
122
  the *quality of data* found on Tatoeba directly influences the perfomance of projects like this!
123
 
124
- If you wish to contribute, please simply add high quality and diverse translations to Tatoeba!
125
  """
126
- )
127
- inputs=[
128
- gr.components.Textbox(label="Text"),
129
- gr.components.Dropdown(label="Source Language", choices=list(LANG_CODES.keys())),
130
- gr.components.Dropdown(label="Target Language", choices=list(LANG_CODES.keys())),
131
- gr.Slider(label="Number of return sequences", value=3, minimum=1, maximum=12, step=1)
132
- ]
133
-
134
- outputs = gr.Textbox()
135
-
136
- translate_btn = gr.Button("Translate! | o ante toki!")
137
- translate_btn.click(translate, inputs=inputs, outputs=outputs)
 
 
 
 
 
 
 
 
 
 
 
138
 
139
  app.launch()
 
2
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
3
  import torch
4
 
 
5
  model = AutoModelForSeq2SeqLM.from_pretrained("Jayyydyyy/m2m100_418m_tokipona")
6
  tokenizer = AutoTokenizer.from_pretrained("facebook/m2m100_418M")
7
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
 
37
  outs = model.generate(**{**ins, **gen_args})
38
  output = tokenizer.batch_decode(outs.sequences, skip_special_tokens=True)
39
 
40
+ return '\n'.join(output)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
  with gr.Blocks() as app:
43
+ markdown="""
44
+ # A Simple English / toki pona Neural Machine Translation App!
45
 
46
  ### toki a! 💬
47
 
 
49
 
50
  Input your text to translate, a source language and target language, and desired number of return sequences!
51
 
52
+ ### Grammar Regularization
53
+ An interesting quirk of training a many-to-many translation model is that pseudo-grammar correction
54
+ can be achieved by translating *from* **language A** *to* **language A**
55
 
56
+ Remember, this can ***approximate*** grammaticality, but it isn't always the best.
57
 
58
+ For example, "mi li toki e toki pona" (Source Language: toki pona & Target Language: toki pona) will result in:
59
+ - ['mi toki e toki pona.', 'mi toki pona.', 'mi toki e toki pona']
60
+ - (Thus, the ungrammatical "li" is dropped)
61
 
62
  ### Model and Data
63
  This app utilizes a fine-tuned version of Facebook/Meta AI's M2M100 418M param model.
 
71
  In addition to parameter quantity and the hyper-parameters used while training,
72
  the *quality of data* found on Tatoeba directly influences the perfomance of projects like this!
73
 
74
+ If you wish to contribute, please add high quality and diverse translations to Tatoeba!
75
  """
76
+
77
+ with gr.Row():
78
+ gr.Markdown(markdown)
79
+ with gr.Column():
80
+ input_text = gr.components.Textbox(label="Input Text", value="Raccoons are fascinating creatures, but I prefer opossums.")
81
+ source_lang = gr.components.Dropdown(label="Source Language", value="English", choices=list(LANG_CODES.keys()))
82
+ target_lang = gr.components.Dropdown(label="Target Language", value="toki pona", choices=list(LANG_CODES.keys()))
83
+ return_seqs = gr.Slider(label="Number of return sequences", value=3, minimum=1, maximum=12, step=1)
84
+
85
+ inputs=[input_text, source_lang, target_lang, return_seqs]
86
+ outputs = gr.Textbox()
87
+
88
+ translate_btn = gr.Button("Translate! | o ante toki!")
89
+ translate_btn.click(translate, inputs=inputs, outputs=outputs)
90
+
91
+ gr.Examples(
92
+ [
93
+ ["Hello! How are you?", "English", "toki pona", 3],
94
+ ["toki a! ilo pi ante toki ni li pona!", "toki pona", "English", 3],
95
+ ["mi toki e toki pona", "toki pona", "toki pona", 3],
96
+ ],
97
+ inputs=inputs
98
+ )
99
 
100
  app.launch()