tiedeman commited on
Commit
32ad5c9
1 Parent(s): fa6d529

Initial commit

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.spm filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,1259 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ language:
4
+ - chm
5
+ - de
6
+ - en
7
+ - es
8
+ - et
9
+ - fi
10
+ - fkv
11
+ - fr
12
+ - hu
13
+ - izh
14
+ - krl
15
+ - kv
16
+ - liv
17
+ - mdf
18
+ - mrj
19
+ - myv
20
+ - pt
21
+ - se
22
+ - sma
23
+ - smn
24
+ - udm
25
+ - vep
26
+ - vot
27
+
28
+ tags:
29
+ - translation
30
+ - opus-mt-tc-bible
31
+
32
+ license: apache-2.0
33
+ model-index:
34
+ - name: opus-mt-tc-bible-big-fiu-deu_eng_fra_por_spa
35
+ results:
36
+ - task:
37
+ name: Translation est-deu
38
+ type: translation
39
+ args: est-deu
40
+ dataset:
41
+ name: flores200-devtest
42
+ type: flores200-devtest
43
+ args: est-deu
44
+ metrics:
45
+ - name: BLEU
46
+ type: bleu
47
+ value: 26.3
48
+ - name: chr-F
49
+ type: chrf
50
+ value: 0.55825
51
+ - task:
52
+ name: Translation est-eng
53
+ type: translation
54
+ args: est-eng
55
+ dataset:
56
+ name: flores200-devtest
57
+ type: flores200-devtest
58
+ args: est-eng
59
+ metrics:
60
+ - name: BLEU
61
+ type: bleu
62
+ value: 35.4
63
+ - name: chr-F
64
+ type: chrf
65
+ value: 0.62404
66
+ - task:
67
+ name: Translation est-fra
68
+ type: translation
69
+ args: est-fra
70
+ dataset:
71
+ name: flores200-devtest
72
+ type: flores200-devtest
73
+ args: est-fra
74
+ metrics:
75
+ - name: BLEU
76
+ type: bleu
77
+ value: 31.7
78
+ - name: chr-F
79
+ type: chrf
80
+ value: 0.58580
81
+ - task:
82
+ name: Translation est-por
83
+ type: translation
84
+ args: est-por
85
+ dataset:
86
+ name: flores200-devtest
87
+ type: flores200-devtest
88
+ args: est-por
89
+ metrics:
90
+ - name: BLEU
91
+ type: bleu
92
+ value: 27.3
93
+ - name: chr-F
94
+ type: chrf
95
+ value: 0.55070
96
+ - task:
97
+ name: Translation est-spa
98
+ type: translation
99
+ args: est-spa
100
+ dataset:
101
+ name: flores200-devtest
102
+ type: flores200-devtest
103
+ args: est-spa
104
+ metrics:
105
+ - name: BLEU
106
+ type: bleu
107
+ value: 21.5
108
+ - name: chr-F
109
+ type: chrf
110
+ value: 0.50188
111
+ - task:
112
+ name: Translation fin-deu
113
+ type: translation
114
+ args: fin-deu
115
+ dataset:
116
+ name: flores200-devtest
117
+ type: flores200-devtest
118
+ args: fin-deu
119
+ metrics:
120
+ - name: BLEU
121
+ type: bleu
122
+ value: 24.0
123
+ - name: chr-F
124
+ type: chrf
125
+ value: 0.54281
126
+ - task:
127
+ name: Translation fin-eng
128
+ type: translation
129
+ args: fin-eng
130
+ dataset:
131
+ name: flores200-devtest
132
+ type: flores200-devtest
133
+ args: fin-eng
134
+ metrics:
135
+ - name: BLEU
136
+ type: bleu
137
+ value: 33.1
138
+ - name: chr-F
139
+ type: chrf
140
+ value: 0.60642
141
+ - task:
142
+ name: Translation fin-fra
143
+ type: translation
144
+ args: fin-fra
145
+ dataset:
146
+ name: flores200-devtest
147
+ type: flores200-devtest
148
+ args: fin-fra
149
+ metrics:
150
+ - name: BLEU
151
+ type: bleu
152
+ value: 30.5
153
+ - name: chr-F
154
+ type: chrf
155
+ value: 0.57540
156
+ - task:
157
+ name: Translation fin-por
158
+ type: translation
159
+ args: fin-por
160
+ dataset:
161
+ name: flores200-devtest
162
+ type: flores200-devtest
163
+ args: fin-por
164
+ metrics:
165
+ - name: BLEU
166
+ type: bleu
167
+ value: 27.4
168
+ - name: chr-F
169
+ type: chrf
170
+ value: 0.55497
171
+ - task:
172
+ name: Translation fin-spa
173
+ type: translation
174
+ args: fin-spa
175
+ dataset:
176
+ name: flores200-devtest
177
+ type: flores200-devtest
178
+ args: fin-spa
179
+ metrics:
180
+ - name: BLEU
181
+ type: bleu
182
+ value: 21.4
183
+ - name: chr-F
184
+ type: chrf
185
+ value: 0.49847
186
+ - task:
187
+ name: Translation hun-deu
188
+ type: translation
189
+ args: hun-deu
190
+ dataset:
191
+ name: flores200-devtest
192
+ type: flores200-devtest
193
+ args: hun-deu
194
+ metrics:
195
+ - name: BLEU
196
+ type: bleu
197
+ value: 25.1
198
+ - name: chr-F
199
+ type: chrf
200
+ value: 0.55180
201
+ - task:
202
+ name: Translation hun-eng
203
+ type: translation
204
+ args: hun-eng
205
+ dataset:
206
+ name: flores200-devtest
207
+ type: flores200-devtest
208
+ args: hun-eng
209
+ metrics:
210
+ - name: BLEU
211
+ type: bleu
212
+ value: 34.0
213
+ - name: chr-F
214
+ type: chrf
215
+ value: 0.61466
216
+ - task:
217
+ name: Translation hun-fra
218
+ type: translation
219
+ args: hun-fra
220
+ dataset:
221
+ name: flores200-devtest
222
+ type: flores200-devtest
223
+ args: hun-fra
224
+ metrics:
225
+ - name: BLEU
226
+ type: bleu
227
+ value: 30.6
228
+ - name: chr-F
229
+ type: chrf
230
+ value: 0.57670
231
+ - task:
232
+ name: Translation hun-por
233
+ type: translation
234
+ args: hun-por
235
+ dataset:
236
+ name: flores200-devtest
237
+ type: flores200-devtest
238
+ args: hun-por
239
+ metrics:
240
+ - name: BLEU
241
+ type: bleu
242
+ value: 28.9
243
+ - name: chr-F
244
+ type: chrf
245
+ value: 0.56510
246
+ - task:
247
+ name: Translation hun-spa
248
+ type: translation
249
+ args: hun-spa
250
+ dataset:
251
+ name: flores200-devtest
252
+ type: flores200-devtest
253
+ args: hun-spa
254
+ metrics:
255
+ - name: BLEU
256
+ type: bleu
257
+ value: 21.3
258
+ - name: chr-F
259
+ type: chrf
260
+ value: 0.49681
261
+ - task:
262
+ name: Translation est-deu
263
+ type: translation
264
+ args: est-deu
265
+ dataset:
266
+ name: flores101-devtest
267
+ type: flores_101
268
+ args: est deu devtest
269
+ metrics:
270
+ - name: BLEU
271
+ type: bleu
272
+ value: 25.7
273
+ - name: chr-F
274
+ type: chrf
275
+ value: 0.55353
276
+ - task:
277
+ name: Translation est-eng
278
+ type: translation
279
+ args: est-eng
280
+ dataset:
281
+ name: flores101-devtest
282
+ type: flores_101
283
+ args: est eng devtest
284
+ metrics:
285
+ - name: BLEU
286
+ type: bleu
287
+ value: 34.7
288
+ - name: chr-F
289
+ type: chrf
290
+ value: 0.61930
291
+ - task:
292
+ name: Translation est-fra
293
+ type: translation
294
+ args: est-fra
295
+ dataset:
296
+ name: flores101-devtest
297
+ type: flores_101
298
+ args: est fra devtest
299
+ metrics:
300
+ - name: BLEU
301
+ type: bleu
302
+ value: 31.3
303
+ - name: chr-F
304
+ type: chrf
305
+ value: 0.58199
306
+ - task:
307
+ name: Translation est-por
308
+ type: translation
309
+ args: est-por
310
+ dataset:
311
+ name: flores101-devtest
312
+ type: flores_101
313
+ args: est por devtest
314
+ metrics:
315
+ - name: BLEU
316
+ type: bleu
317
+ value: 26.5
318
+ - name: chr-F
319
+ type: chrf
320
+ value: 0.54388
321
+ - task:
322
+ name: Translation fin-eng
323
+ type: translation
324
+ args: fin-eng
325
+ dataset:
326
+ name: flores101-devtest
327
+ type: flores_101
328
+ args: fin eng devtest
329
+ metrics:
330
+ - name: BLEU
331
+ type: bleu
332
+ value: 32.2
333
+ - name: chr-F
334
+ type: chrf
335
+ value: 0.59914
336
+ - task:
337
+ name: Translation fin-por
338
+ type: translation
339
+ args: fin-por
340
+ dataset:
341
+ name: flores101-devtest
342
+ type: flores_101
343
+ args: fin por devtest
344
+ metrics:
345
+ - name: BLEU
346
+ type: bleu
347
+ value: 27.1
348
+ - name: chr-F
349
+ type: chrf
350
+ value: 0.55156
351
+ - task:
352
+ name: Translation hun-eng
353
+ type: translation
354
+ args: hun-eng
355
+ dataset:
356
+ name: flores101-devtest
357
+ type: flores_101
358
+ args: hun eng devtest
359
+ metrics:
360
+ - name: BLEU
361
+ type: bleu
362
+ value: 33.5
363
+ - name: chr-F
364
+ type: chrf
365
+ value: 0.61198
366
+ - task:
367
+ name: Translation hun-fra
368
+ type: translation
369
+ args: hun-fra
370
+ dataset:
371
+ name: flores101-devtest
372
+ type: flores_101
373
+ args: hun fra devtest
374
+ metrics:
375
+ - name: BLEU
376
+ type: bleu
377
+ value: 30.8
378
+ - name: chr-F
379
+ type: chrf
380
+ value: 0.57776
381
+ - task:
382
+ name: Translation hun-por
383
+ type: translation
384
+ args: hun-por
385
+ dataset:
386
+ name: flores101-devtest
387
+ type: flores_101
388
+ args: hun por devtest
389
+ metrics:
390
+ - name: BLEU
391
+ type: bleu
392
+ value: 28.4
393
+ - name: chr-F
394
+ type: chrf
395
+ value: 0.56263
396
+ - task:
397
+ name: Translation hun-spa
398
+ type: translation
399
+ args: hun-spa
400
+ dataset:
401
+ name: flores101-devtest
402
+ type: flores_101
403
+ args: hun spa devtest
404
+ metrics:
405
+ - name: BLEU
406
+ type: bleu
407
+ value: 20.7
408
+ - name: chr-F
409
+ type: chrf
410
+ value: 0.49140
411
+ - task:
412
+ name: Translation est-deu
413
+ type: translation
414
+ args: est-deu
415
+ dataset:
416
+ name: ntrex128
417
+ type: ntrex128
418
+ args: est-deu
419
+ metrics:
420
+ - name: BLEU
421
+ type: bleu
422
+ value: 21.4
423
+ - name: chr-F
424
+ type: chrf
425
+ value: 0.51377
426
+ - task:
427
+ name: Translation est-eng
428
+ type: translation
429
+ args: est-eng
430
+ dataset:
431
+ name: ntrex128
432
+ type: ntrex128
433
+ args: est-eng
434
+ metrics:
435
+ - name: BLEU
436
+ type: bleu
437
+ value: 29.9
438
+ - name: chr-F
439
+ type: chrf
440
+ value: 0.58358
441
+ - task:
442
+ name: Translation est-fra
443
+ type: translation
444
+ args: est-fra
445
+ dataset:
446
+ name: ntrex128
447
+ type: ntrex128
448
+ args: est-fra
449
+ metrics:
450
+ - name: BLEU
451
+ type: bleu
452
+ value: 24.9
453
+ - name: chr-F
454
+ type: chrf
455
+ value: 0.52713
456
+ - task:
457
+ name: Translation est-por
458
+ type: translation
459
+ args: est-por
460
+ dataset:
461
+ name: ntrex128
462
+ type: ntrex128
463
+ args: est-por
464
+ metrics:
465
+ - name: BLEU
466
+ type: bleu
467
+ value: 22.2
468
+ - name: chr-F
469
+ type: chrf
470
+ value: 0.50745
471
+ - task:
472
+ name: Translation est-spa
473
+ type: translation
474
+ args: est-spa
475
+ dataset:
476
+ name: ntrex128
477
+ type: ntrex128
478
+ args: est-spa
479
+ metrics:
480
+ - name: BLEU
481
+ type: bleu
482
+ value: 27.5
483
+ - name: chr-F
484
+ type: chrf
485
+ value: 0.54304
486
+ - task:
487
+ name: Translation fin-deu
488
+ type: translation
489
+ args: fin-deu
490
+ dataset:
491
+ name: ntrex128
492
+ type: ntrex128
493
+ args: fin-deu
494
+ metrics:
495
+ - name: BLEU
496
+ type: bleu
497
+ value: 19.8
498
+ - name: chr-F
499
+ type: chrf
500
+ value: 0.50282
501
+ - task:
502
+ name: Translation fin-eng
503
+ type: translation
504
+ args: fin-eng
505
+ dataset:
506
+ name: ntrex128
507
+ type: ntrex128
508
+ args: fin-eng
509
+ metrics:
510
+ - name: BLEU
511
+ type: bleu
512
+ value: 26.3
513
+ - name: chr-F
514
+ type: chrf
515
+ value: 0.55545
516
+ - task:
517
+ name: Translation fin-fra
518
+ type: translation
519
+ args: fin-fra
520
+ dataset:
521
+ name: ntrex128
522
+ type: ntrex128
523
+ args: fin-fra
524
+ metrics:
525
+ - name: BLEU
526
+ type: bleu
527
+ value: 22.9
528
+ - name: chr-F
529
+ type: chrf
530
+ value: 0.50946
531
+ - task:
532
+ name: Translation fin-por
533
+ type: translation
534
+ args: fin-por
535
+ dataset:
536
+ name: ntrex128
537
+ type: ntrex128
538
+ args: fin-por
539
+ metrics:
540
+ - name: BLEU
541
+ type: bleu
542
+ value: 21.3
543
+ - name: chr-F
544
+ type: chrf
545
+ value: 0.50404
546
+ - task:
547
+ name: Translation fin-spa
548
+ type: translation
549
+ args: fin-spa
550
+ dataset:
551
+ name: ntrex128
552
+ type: ntrex128
553
+ args: fin-spa
554
+ metrics:
555
+ - name: BLEU
556
+ type: bleu
557
+ value: 25.5
558
+ - name: chr-F
559
+ type: chrf
560
+ value: 0.52641
561
+ - task:
562
+ name: Translation hun-deu
563
+ type: translation
564
+ args: hun-deu
565
+ dataset:
566
+ name: ntrex128
567
+ type: ntrex128
568
+ args: hun-deu
569
+ metrics:
570
+ - name: BLEU
571
+ type: bleu
572
+ value: 18.5
573
+ - name: chr-F
574
+ type: chrf
575
+ value: 0.49322
576
+ - task:
577
+ name: Translation hun-eng
578
+ type: translation
579
+ args: hun-eng
580
+ dataset:
581
+ name: ntrex128
582
+ type: ntrex128
583
+ args: hun-eng
584
+ metrics:
585
+ - name: BLEU
586
+ type: bleu
587
+ value: 23.3
588
+ - name: chr-F
589
+ type: chrf
590
+ value: 0.52964
591
+ - task:
592
+ name: Translation hun-fra
593
+ type: translation
594
+ args: hun-fra
595
+ dataset:
596
+ name: ntrex128
597
+ type: ntrex128
598
+ args: hun-fra
599
+ metrics:
600
+ - name: BLEU
601
+ type: bleu
602
+ value: 21.8
603
+ - name: chr-F
604
+ type: chrf
605
+ value: 0.49800
606
+ - task:
607
+ name: Translation hun-por
608
+ type: translation
609
+ args: hun-por
610
+ dataset:
611
+ name: ntrex128
612
+ type: ntrex128
613
+ args: hun-por
614
+ metrics:
615
+ - name: BLEU
616
+ type: bleu
617
+ value: 20.5
618
+ - name: chr-F
619
+ type: chrf
620
+ value: 0.48941
621
+ - task:
622
+ name: Translation hun-spa
623
+ type: translation
624
+ args: hun-spa
625
+ dataset:
626
+ name: ntrex128
627
+ type: ntrex128
628
+ args: hun-spa
629
+ metrics:
630
+ - name: BLEU
631
+ type: bleu
632
+ value: 24.2
633
+ - name: chr-F
634
+ type: chrf
635
+ value: 0.51123
636
+ - task:
637
+ name: Translation est-deu
638
+ type: translation
639
+ args: est-deu
640
+ dataset:
641
+ name: tatoeba-test-v2021-08-07
642
+ type: tatoeba_mt
643
+ args: est-deu
644
+ metrics:
645
+ - name: BLEU
646
+ type: bleu
647
+ value: 53.9
648
+ - name: chr-F
649
+ type: chrf
650
+ value: 0.69451
651
+ - task:
652
+ name: Translation est-eng
653
+ type: translation
654
+ args: est-eng
655
+ dataset:
656
+ name: tatoeba-test-v2021-08-07
657
+ type: tatoeba_mt
658
+ args: est-eng
659
+ metrics:
660
+ - name: BLEU
661
+ type: bleu
662
+ value: 58.2
663
+ - name: chr-F
664
+ type: chrf
665
+ value: 0.72437
666
+ - task:
667
+ name: Translation fin-deu
668
+ type: translation
669
+ args: fin-deu
670
+ dataset:
671
+ name: tatoeba-test-v2021-08-07
672
+ type: tatoeba_mt
673
+ args: fin-deu
674
+ metrics:
675
+ - name: BLEU
676
+ type: bleu
677
+ value: 47.3
678
+ - name: chr-F
679
+ type: chrf
680
+ value: 0.66025
681
+ - task:
682
+ name: Translation fin-eng
683
+ type: translation
684
+ args: fin-eng
685
+ dataset:
686
+ name: tatoeba-test-v2021-08-07
687
+ type: tatoeba_mt
688
+ args: fin-eng
689
+ metrics:
690
+ - name: BLEU
691
+ type: bleu
692
+ value: 53.7
693
+ - name: chr-F
694
+ type: chrf
695
+ value: 0.69685
696
+ - task:
697
+ name: Translation fin-fra
698
+ type: translation
699
+ args: fin-fra
700
+ dataset:
701
+ name: tatoeba-test-v2021-08-07
702
+ type: tatoeba_mt
703
+ args: fin-fra
704
+ metrics:
705
+ - name: BLEU
706
+ type: bleu
707
+ value: 48.3
708
+ - name: chr-F
709
+ type: chrf
710
+ value: 0.65900
711
+ - task:
712
+ name: Translation fin-por
713
+ type: translation
714
+ args: fin-por
715
+ dataset:
716
+ name: tatoeba-test-v2021-08-07
717
+ type: tatoeba_mt
718
+ args: fin-por
719
+ metrics:
720
+ - name: BLEU
721
+ type: bleu
722
+ value: 54.0
723
+ - name: chr-F
724
+ type: chrf
725
+ value: 0.72250
726
+ - task:
727
+ name: Translation fin-spa
728
+ type: translation
729
+ args: fin-spa
730
+ dataset:
731
+ name: tatoeba-test-v2021-08-07
732
+ type: tatoeba_mt
733
+ args: fin-spa
734
+ metrics:
735
+ - name: BLEU
736
+ type: bleu
737
+ value: 52.1
738
+ - name: chr-F
739
+ type: chrf
740
+ value: 0.69600
741
+ - task:
742
+ name: Translation hun-deu
743
+ type: translation
744
+ args: hun-deu
745
+ dataset:
746
+ name: tatoeba-test-v2021-08-07
747
+ type: tatoeba_mt
748
+ args: hun-deu
749
+ metrics:
750
+ - name: BLEU
751
+ type: bleu
752
+ value: 41.1
753
+ - name: chr-F
754
+ type: chrf
755
+ value: 0.62418
756
+ - task:
757
+ name: Translation hun-eng
758
+ type: translation
759
+ args: hun-eng
760
+ dataset:
761
+ name: tatoeba-test-v2021-08-07
762
+ type: tatoeba_mt
763
+ args: hun-eng
764
+ metrics:
765
+ - name: BLEU
766
+ type: bleu
767
+ value: 48.7
768
+ - name: chr-F
769
+ type: chrf
770
+ value: 0.65626
771
+ - task:
772
+ name: Translation hun-fra
773
+ type: translation
774
+ args: hun-fra
775
+ dataset:
776
+ name: tatoeba-test-v2021-08-07
777
+ type: tatoeba_mt
778
+ args: hun-fra
779
+ metrics:
780
+ - name: BLEU
781
+ type: bleu
782
+ value: 50.3
783
+ - name: chr-F
784
+ type: chrf
785
+ value: 0.66840
786
+ - task:
787
+ name: Translation hun-por
788
+ type: translation
789
+ args: hun-por
790
+ dataset:
791
+ name: tatoeba-test-v2021-08-07
792
+ type: tatoeba_mt
793
+ args: hun-por
794
+ metrics:
795
+ - name: BLEU
796
+ type: bleu
797
+ value: 43.1
798
+ - name: chr-F
799
+ type: chrf
800
+ value: 0.65281
801
+ - task:
802
+ name: Translation hun-spa
803
+ type: translation
804
+ args: hun-spa
805
+ dataset:
806
+ name: tatoeba-test-v2021-08-07
807
+ type: tatoeba_mt
808
+ args: hun-spa
809
+ metrics:
810
+ - name: BLEU
811
+ type: bleu
812
+ value: 48.7
813
+ - name: chr-F
814
+ type: chrf
815
+ value: 0.67467
816
+ - task:
817
+ name: Translation multi-multi
818
+ type: translation
819
+ args: multi-multi
820
+ dataset:
821
+ name: tatoeba-test-v2020-07-28-v2023-09-26
822
+ type: tatoeba_mt
823
+ args: multi-multi
824
+ metrics:
825
+ - name: BLEU
826
+ type: bleu
827
+ value: 44.6
828
+ - name: chr-F
829
+ type: chrf
830
+ value: 0.63895
831
+ - task:
832
+ name: Translation hun-deu
833
+ type: translation
834
+ args: hun-deu
835
+ dataset:
836
+ name: newstest2008
837
+ type: wmt-2008-news
838
+ args: hun-deu
839
+ metrics:
840
+ - name: BLEU
841
+ type: bleu
842
+ value: 19.0
843
+ - name: chr-F
844
+ type: chrf
845
+ value: 0.50164
846
+ - task:
847
+ name: Translation hun-eng
848
+ type: translation
849
+ args: hun-eng
850
+ dataset:
851
+ name: newstest2008
852
+ type: wmt-2008-news
853
+ args: hun-eng
854
+ metrics:
855
+ - name: BLEU
856
+ type: bleu
857
+ value: 20.4
858
+ - name: chr-F
859
+ type: chrf
860
+ value: 0.49802
861
+ - task:
862
+ name: Translation hun-fra
863
+ type: translation
864
+ args: hun-fra
865
+ dataset:
866
+ name: newstest2008
867
+ type: wmt-2008-news
868
+ args: hun-fra
869
+ metrics:
870
+ - name: BLEU
871
+ type: bleu
872
+ value: 21.6
873
+ - name: chr-F
874
+ type: chrf
875
+ value: 0.51012
876
+ - task:
877
+ name: Translation hun-spa
878
+ type: translation
879
+ args: hun-spa
880
+ dataset:
881
+ name: newstest2008
882
+ type: wmt-2008-news
883
+ args: hun-spa
884
+ metrics:
885
+ - name: BLEU
886
+ type: bleu
887
+ value: 22.3
888
+ - name: chr-F
889
+ type: chrf
890
+ value: 0.50719
891
+ - task:
892
+ name: Translation hun-deu
893
+ type: translation
894
+ args: hun-deu
895
+ dataset:
896
+ name: newstest2009
897
+ type: wmt-2009-news
898
+ args: hun-deu
899
+ metrics:
900
+ - name: BLEU
901
+ type: bleu
902
+ value: 18.6
903
+ - name: chr-F
904
+ type: chrf
905
+ value: 0.49902
906
+ - task:
907
+ name: Translation hun-eng
908
+ type: translation
909
+ args: hun-eng
910
+ dataset:
911
+ name: newstest2009
912
+ type: wmt-2009-news
913
+ args: hun-eng
914
+ metrics:
915
+ - name: BLEU
916
+ type: bleu
917
+ value: 22.3
918
+ - name: chr-F
919
+ type: chrf
920
+ value: 0.50950
921
+ - task:
922
+ name: Translation hun-fra
923
+ type: translation
924
+ args: hun-fra
925
+ dataset:
926
+ name: newstest2009
927
+ type: wmt-2009-news
928
+ args: hun-fra
929
+ metrics:
930
+ - name: BLEU
931
+ type: bleu
932
+ value: 21.6
933
+ - name: chr-F
934
+ type: chrf
935
+ value: 0.50742
936
+ - task:
937
+ name: Translation hun-spa
938
+ type: translation
939
+ args: hun-spa
940
+ dataset:
941
+ name: newstest2009
942
+ type: wmt-2009-news
943
+ args: hun-spa
944
+ metrics:
945
+ - name: BLEU
946
+ type: bleu
947
+ value: 22.2
948
+ - name: chr-F
949
+ type: chrf
950
+ value: 0.50788
951
+ - task:
952
+ name: Translation fin-eng
953
+ type: translation
954
+ args: fin-eng
955
+ dataset:
956
+ name: newstest2015
957
+ type: wmt-2015-news
958
+ args: fin-eng
959
+ metrics:
960
+ - name: BLEU
961
+ type: bleu
962
+ value: 27.0
963
+ - name: chr-F
964
+ type: chrf
965
+ value: 0.55249
966
+ - task:
967
+ name: Translation fin-eng
968
+ type: translation
969
+ args: fin-eng
970
+ dataset:
971
+ name: newstest2016
972
+ type: wmt-2016-news
973
+ args: fin-eng
974
+ metrics:
975
+ - name: BLEU
976
+ type: bleu
977
+ value: 30.7
978
+ - name: chr-F
979
+ type: chrf
980
+ value: 0.57961
981
+ - task:
982
+ name: Translation fin-eng
983
+ type: translation
984
+ args: fin-eng
985
+ dataset:
986
+ name: newstest2017
987
+ type: wmt-2017-news
988
+ args: fin-eng
989
+ metrics:
990
+ - name: BLEU
991
+ type: bleu
992
+ value: 33.2
993
+ - name: chr-F
994
+ type: chrf
995
+ value: 0.59973
996
+ - task:
997
+ name: Translation est-eng
998
+ type: translation
999
+ args: est-eng
1000
+ dataset:
1001
+ name: newstest2018
1002
+ type: wmt-2018-news
1003
+ args: est-eng
1004
+ metrics:
1005
+ - name: BLEU
1006
+ type: bleu
1007
+ value: 31.5
1008
+ - name: chr-F
1009
+ type: chrf
1010
+ value: 0.59190
1011
+ - task:
1012
+ name: Translation fin-eng
1013
+ type: translation
1014
+ args: fin-eng
1015
+ dataset:
1016
+ name: newstest2018
1017
+ type: wmt-2018-news
1018
+ args: fin-eng
1019
+ metrics:
1020
+ - name: BLEU
1021
+ type: bleu
1022
+ value: 24.4
1023
+ - name: chr-F
1024
+ type: chrf
1025
+ value: 0.52373
1026
+ - task:
1027
+ name: Translation fin-eng
1028
+ type: translation
1029
+ args: fin-eng
1030
+ dataset:
1031
+ name: newstest2019
1032
+ type: wmt-2019-news
1033
+ args: fin-eng
1034
+ metrics:
1035
+ - name: BLEU
1036
+ type: bleu
1037
+ value: 30.3
1038
+ - name: chr-F
1039
+ type: chrf
1040
+ value: 0.57079
1041
+ ---
1042
+ # opus-mt-tc-bible-big-fiu-deu_eng_fra_por_spa
1043
+
1044
+ ## Table of Contents
1045
+ - [Model Details](#model-details)
1046
+ - [Uses](#uses)
1047
+ - [Risks, Limitations and Biases](#risks-limitations-and-biases)
1048
+ - [How to Get Started With the Model](#how-to-get-started-with-the-model)
1049
+ - [Training](#training)
1050
+ - [Evaluation](#evaluation)
1051
+ - [Citation Information](#citation-information)
1052
+ - [Acknowledgements](#acknowledgements)
1053
+
1054
+ ## Model Details
1055
+
1056
+ Neural machine translation model for translating from Finno-Ugrian languages (fiu) to unknown (deu+eng+fra+por+spa).
1057
+
1058
+ This model is part of the [OPUS-MT project](https://github.com/Helsinki-NLP/Opus-MT), an effort to make neural machine translation models widely available and accessible for many languages in the world. All models are originally trained using the amazing framework of [Marian NMT](https://marian-nmt.github.io/), an efficient NMT implementation written in pure C++. The models have been converted to pyTorch using the transformers library by huggingface. Training data is taken from [OPUS](https://opus.nlpl.eu/) and training pipelines use the procedures of [OPUS-MT-train](https://github.com/Helsinki-NLP/Opus-MT-train).
1059
+ **Model Description:**
1060
+ - **Developed by:** Language Technology Research Group at the University of Helsinki
1061
+ - **Model Type:** Translation (transformer-big)
1062
+ - **Release**: 2024-05-30
1063
+ - **License:** Apache-2.0
1064
+ - **Language(s):**
1065
+ - Source Language(s): chm est fin fkv hun izh koi kom kpv krl liv mdf mrj myv sma sme smn udm vep vot vro
1066
+ - Target Language(s): deu eng fra por spa
1067
+ - Valid Target Language Labels: >>deu<< >>eng<< >>fra<< >>por<< >>spa<< >>xxx<<
1068
+ - **Original Model**: [opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-30.zip](https://object.pouta.csc.fi/Tatoeba-MT-models/fiu-deu+eng+fra+por+spa/opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-30.zip)
1069
+ - **Resources for more information:**
1070
+ - [OPUS-MT dashboard](https://opus.nlpl.eu/dashboard/index.php?pkg=opusmt&test=all&scoreslang=all&chart=standard&model=Tatoeba-MT-models/fiu-deu%2Beng%2Bfra%2Bpor%2Bspa/opusTCv20230926max50%2Bbt%2Bjhubc_transformer-big_2024-05-30)
1071
+ - [OPUS-MT-train GitHub Repo](https://github.com/Helsinki-NLP/OPUS-MT-train)
1072
+ - [More information about MarianNMT models in the transformers library](https://huggingface.co/docs/transformers/model_doc/marian)
1073
+ - [Tatoeba Translation Challenge](https://github.com/Helsinki-NLP/Tatoeba-Challenge/)
1074
+ - [HPLT bilingual data v1 (as part of the Tatoeba Translation Challenge dataset)](https://hplt-project.org/datasets/v1)
1075
+ - [A massively parallel Bible corpus](https://aclanthology.org/L14-1215/)
1076
+
1077
+ This is a multilingual translation model with multiple target languages. A sentence initial language token is required in the form of `>>id<<` (id = valid target language ID), e.g. `>>deu<<`
1078
+
1079
+ ## Uses
1080
+
1081
+ This model can be used for translation and text-to-text generation.
1082
+
1083
+ ## Risks, Limitations and Biases
1084
+
1085
+ **CONTENT WARNING: Readers should be aware that the model is trained on various public data sets that may contain content that is disturbing, offensive, and can propagate historical and current stereotypes.**
1086
+
1087
+ Significant research has explored bias and fairness issues with language models (see, e.g., [Sheng et al. (2021)](https://aclanthology.org/2021.acl-long.330.pdf) and [Bender et al. (2021)](https://dl.acm.org/doi/pdf/10.1145/3442188.3445922)).
1088
+
1089
+ ## How to Get Started With the Model
1090
+
1091
+ A short example code:
1092
+
1093
+ ```python
1094
+ from transformers import MarianMTModel, MarianTokenizer
1095
+
1096
+ src_text = [
1097
+ ">>deu<< Replace this with text in an accepted source language.",
1098
+ ">>spa<< This is the second sentence."
1099
+ ]
1100
+
1101
+ model_name = "pytorch-models/opus-mt-tc-bible-big-fiu-deu_eng_fra_por_spa"
1102
+ tokenizer = MarianTokenizer.from_pretrained(model_name)
1103
+ model = MarianMTModel.from_pretrained(model_name)
1104
+ translated = model.generate(**tokenizer(src_text, return_tensors="pt", padding=True))
1105
+
1106
+ for t in translated:
1107
+ print( tokenizer.decode(t, skip_special_tokens=True) )
1108
+ ```
1109
+
1110
+ You can also use OPUS-MT models with the transformers pipelines, for example:
1111
+
1112
+ ```python
1113
+ from transformers import pipeline
1114
+ pipe = pipeline("translation", model="Helsinki-NLP/opus-mt-tc-bible-big-fiu-deu_eng_fra_por_spa")
1115
+ print(pipe(">>deu<< Replace this with text in an accepted source language."))
1116
+ ```
1117
+
1118
+ ## Training
1119
+
1120
+ - **Data**: opusTCv20230926max50+bt+jhubc ([source](https://github.com/Helsinki-NLP/Tatoeba-Challenge))
1121
+ - **Pre-processing**: SentencePiece (spm32k,spm32k)
1122
+ - **Model Type:** transformer-big
1123
+ - **Original MarianNMT Model**: [opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-30.zip](https://object.pouta.csc.fi/Tatoeba-MT-models/fiu-deu+eng+fra+por+spa/opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-30.zip)
1124
+ - **Training Scripts**: [GitHub Repo](https://github.com/Helsinki-NLP/OPUS-MT-train)
1125
+
1126
+ ## Evaluation
1127
+
1128
+ * [Model scores at the OPUS-MT dashboard](https://opus.nlpl.eu/dashboard/index.php?pkg=opusmt&test=all&scoreslang=all&chart=standard&model=Tatoeba-MT-models/fiu-deu%2Beng%2Bfra%2Bpor%2Bspa/opusTCv20230926max50%2Bbt%2Bjhubc_transformer-big_2024-05-30)
1129
+ * test set translations: [opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-29.test.txt](https://object.pouta.csc.fi/Tatoeba-MT-models/fiu-deu+eng+fra+por+spa/opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-29.test.txt)
1130
+ * test set scores: [opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-29.eval.txt](https://object.pouta.csc.fi/Tatoeba-MT-models/fiu-deu+eng+fra+por+spa/opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-29.eval.txt)
1131
+ * benchmark results: [benchmark_results.txt](benchmark_results.txt)
1132
+ * benchmark output: [benchmark_translations.zip](benchmark_translations.zip)
1133
+
1134
+ | langpair | testset | chr-F | BLEU | #sent | #words |
1135
+ |----------|---------|-------|-------|-------|--------|
1136
+ | est-deu | tatoeba-test-v2021-08-07 | 0.69451 | 53.9 | 244 | 1611 |
1137
+ | est-eng | tatoeba-test-v2021-08-07 | 0.72437 | 58.2 | 1359 | 8811 |
1138
+ | fin-deu | tatoeba-test-v2021-08-07 | 0.66025 | 47.3 | 2647 | 19163 |
1139
+ | fin-eng | tatoeba-test-v2021-08-07 | 0.69685 | 53.7 | 10690 | 80552 |
1140
+ | fin-fra | tatoeba-test-v2021-08-07 | 0.65900 | 48.3 | 1920 | 12193 |
1141
+ | fin-por | tatoeba-test-v2021-08-07 | 0.72250 | 54.0 | 477 | 3021 |
1142
+ | fin-spa | tatoeba-test-v2021-08-07 | 0.69600 | 52.1 | 2513 | 16912 |
1143
+ | hun-deu | tatoeba-test-v2021-08-07 | 0.62418 | 41.1 | 15342 | 127344 |
1144
+ | hun-eng | tatoeba-test-v2021-08-07 | 0.65626 | 48.7 | 13037 | 94699 |
1145
+ | hun-fra | tatoeba-test-v2021-08-07 | 0.66840 | 50.3 | 2494 | 16914 |
1146
+ | hun-por | tatoeba-test-v2021-08-07 | 0.65281 | 43.1 | 2500 | 16563 |
1147
+ | hun-spa | tatoeba-test-v2021-08-07 | 0.67467 | 48.7 | 2500 | 16670 |
1148
+ | est-deu | flores101-devtest | 0.55353 | 25.7 | 1012 | 25094 |
1149
+ | est-eng | flores101-devtest | 0.61930 | 34.7 | 1012 | 24721 |
1150
+ | est-fra | flores101-devtest | 0.58199 | 31.3 | 1012 | 28343 |
1151
+ | est-por | flores101-devtest | 0.54388 | 26.5 | 1012 | 26519 |
1152
+ | fin-eng | flores101-devtest | 0.59914 | 32.2 | 1012 | 24721 |
1153
+ | fin-por | flores101-devtest | 0.55156 | 27.1 | 1012 | 26519 |
1154
+ | hun-eng | flores101-devtest | 0.61198 | 33.5 | 1012 | 24721 |
1155
+ | hun-fra | flores101-devtest | 0.57776 | 30.8 | 1012 | 28343 |
1156
+ | hun-por | flores101-devtest | 0.56263 | 28.4 | 1012 | 26519 |
1157
+ | hun-spa | flores101-devtest | 0.49140 | 20.7 | 1012 | 29199 |
1158
+ | est-deu | flores200-devtest | 0.55825 | 26.3 | 1012 | 25094 |
1159
+ | est-eng | flores200-devtest | 0.62404 | 35.4 | 1012 | 24721 |
1160
+ | est-fra | flores200-devtest | 0.58580 | 31.7 | 1012 | 28343 |
1161
+ | est-por | flores200-devtest | 0.55070 | 27.3 | 1012 | 26519 |
1162
+ | est-spa | flores200-devtest | 0.50188 | 21.5 | 1012 | 29199 |
1163
+ | fin-deu | flores200-devtest | 0.54281 | 24.0 | 1012 | 25094 |
1164
+ | fin-eng | flores200-devtest | 0.60642 | 33.1 | 1012 | 24721 |
1165
+ | fin-fra | flores200-devtest | 0.57540 | 30.5 | 1012 | 28343 |
1166
+ | fin-por | flores200-devtest | 0.55497 | 27.4 | 1012 | 26519 |
1167
+ | fin-spa | flores200-devtest | 0.49847 | 21.4 | 1012 | 29199 |
1168
+ | hun-deu | flores200-devtest | 0.55180 | 25.1 | 1012 | 25094 |
1169
+ | hun-eng | flores200-devtest | 0.61466 | 34.0 | 1012 | 24721 |
1170
+ | hun-fra | flores200-devtest | 0.57670 | 30.6 | 1012 | 28343 |
1171
+ | hun-por | flores200-devtest | 0.56510 | 28.9 | 1012 | 26519 |
1172
+ | hun-spa | flores200-devtest | 0.49681 | 21.3 | 1012 | 29199 |
1173
+ | hun-deu | newssyscomb2009 | 0.49819 | 17.9 | 502 | 11271 |
1174
+ | hun-eng | newssyscomb2009 | 0.52063 | 24.4 | 502 | 11818 |
1175
+ | hun-fra | newssyscomb2009 | 0.51589 | 22.0 | 502 | 12331 |
1176
+ | hun-spa | newssyscomb2009 | 0.51508 | 22.7 | 502 | 12503 |
1177
+ | hun-deu | newstest2008 | 0.50164 | 19.0 | 2051 | 47447 |
1178
+ | hun-eng | newstest2008 | 0.49802 | 20.4 | 2051 | 49380 |
1179
+ | hun-fra | newstest2008 | 0.51012 | 21.6 | 2051 | 52685 |
1180
+ | hun-spa | newstest2008 | 0.50719 | 22.3 | 2051 | 52586 |
1181
+ | hun-deu | newstest2009 | 0.49902 | 18.6 | 2525 | 62816 |
1182
+ | hun-eng | newstest2009 | 0.50950 | 22.3 | 2525 | 65399 |
1183
+ | hun-fra | newstest2009 | 0.50742 | 21.6 | 2525 | 69263 |
1184
+ | hun-spa | newstest2009 | 0.50788 | 22.2 | 2525 | 68111 |
1185
+ | fin-eng | newstest2015 | 0.55249 | 27.0 | 1370 | 27270 |
1186
+ | fin-eng | newstest2016 | 0.57961 | 30.7 | 3000 | 62945 |
1187
+ | fin-eng | newstest2017 | 0.59973 | 33.2 | 3002 | 61846 |
1188
+ | est-eng | newstest2018 | 0.59190 | 31.5 | 2000 | 45405 |
1189
+ | fin-eng | newstest2018 | 0.52373 | 24.4 | 3000 | 62325 |
1190
+ | fin-eng | newstest2019 | 0.57079 | 30.3 | 1996 | 36215 |
1191
+ | fin-eng | newstestB2017 | 0.56420 | 28.9 | 3002 | 61846 |
1192
+ | est-deu | ntrex128 | 0.51377 | 21.4 | 1997 | 48761 |
1193
+ | est-eng | ntrex128 | 0.58358 | 29.9 | 1997 | 47673 |
1194
+ | est-fra | ntrex128 | 0.52713 | 24.9 | 1997 | 53481 |
1195
+ | est-por | ntrex128 | 0.50745 | 22.2 | 1997 | 51631 |
1196
+ | est-spa | ntrex128 | 0.54304 | 27.5 | 1997 | 54107 |
1197
+ | fin-deu | ntrex128 | 0.50282 | 19.8 | 1997 | 48761 |
1198
+ | fin-eng | ntrex128 | 0.55545 | 26.3 | 1997 | 47673 |
1199
+ | fin-fra | ntrex128 | 0.50946 | 22.9 | 1997 | 53481 |
1200
+ | fin-por | ntrex128 | 0.50404 | 21.3 | 1997 | 51631 |
1201
+ | fin-spa | ntrex128 | 0.52641 | 25.5 | 1997 | 54107 |
1202
+ | hun-deu | ntrex128 | 0.49322 | 18.5 | 1997 | 48761 |
1203
+ | hun-eng | ntrex128 | 0.52964 | 23.3 | 1997 | 47673 |
1204
+ | hun-fra | ntrex128 | 0.49800 | 21.8 | 1997 | 53481 |
1205
+ | hun-por | ntrex128 | 0.48941 | 20.5 | 1997 | 51631 |
1206
+ | hun-spa | ntrex128 | 0.51123 | 24.2 | 1997 | 54107 |
1207
+
1208
+ ## Citation Information
1209
+
1210
+ * Publications: [Democratizing neural machine translation with OPUS-MT](https://doi.org/10.1007/s10579-023-09704-w) and [OPUS-MT – Building open translation services for the World](https://aclanthology.org/2020.eamt-1.61/) and [The Tatoeba Translation Challenge – Realistic Data Sets for Low Resource and Multilingual MT](https://aclanthology.org/2020.wmt-1.139/) (Please, cite if you use this model.)
1211
+
1212
+ ```bibtex
1213
+ @article{tiedemann2023democratizing,
1214
+ title={Democratizing neural machine translation with {OPUS-MT}},
1215
+ author={Tiedemann, J{\"o}rg and Aulamo, Mikko and Bakshandaeva, Daria and Boggia, Michele and Gr{\"o}nroos, Stig-Arne and Nieminen, Tommi and Raganato, Alessandro and Scherrer, Yves and Vazquez, Raul and Virpioja, Sami},
1216
+ journal={Language Resources and Evaluation},
1217
+ number={58},
1218
+ pages={713--755},
1219
+ year={2023},
1220
+ publisher={Springer Nature},
1221
+ issn={1574-0218},
1222
+ doi={10.1007/s10579-023-09704-w}
1223
+ }
1224
+
1225
+ @inproceedings{tiedemann-thottingal-2020-opus,
1226
+ title = "{OPUS}-{MT} {--} Building open translation services for the World",
1227
+ author = {Tiedemann, J{\"o}rg and Thottingal, Santhosh},
1228
+ booktitle = "Proceedings of the 22nd Annual Conference of the European Association for Machine Translation",
1229
+ month = nov,
1230
+ year = "2020",
1231
+ address = "Lisboa, Portugal",
1232
+ publisher = "European Association for Machine Translation",
1233
+ url = "https://aclanthology.org/2020.eamt-1.61",
1234
+ pages = "479--480",
1235
+ }
1236
+
1237
+ @inproceedings{tiedemann-2020-tatoeba,
1238
+ title = "The Tatoeba Translation Challenge {--} Realistic Data Sets for Low Resource and Multilingual {MT}",
1239
+ author = {Tiedemann, J{\"o}rg},
1240
+ booktitle = "Proceedings of the Fifth Conference on Machine Translation",
1241
+ month = nov,
1242
+ year = "2020",
1243
+ address = "Online",
1244
+ publisher = "Association for Computational Linguistics",
1245
+ url = "https://aclanthology.org/2020.wmt-1.139",
1246
+ pages = "1174--1182",
1247
+ }
1248
+ ```
1249
+
1250
+ ## Acknowledgements
1251
+
1252
+ The work is supported by the [HPLT project](https://hplt-project.org/), funded by the European Union’s Horizon Europe research and innovation programme under grant agreement No 101070350. We are also grateful for the generous computational resources and IT infrastructure provided by [CSC -- IT Center for Science](https://www.csc.fi/), Finland, and the [EuroHPC supercomputer LUMI](https://www.lumi-supercomputer.eu/).
1253
+
1254
+ ## Model conversion info
1255
+
1256
+ * transformers version: 4.45.1
1257
+ * OPUS-MT git hash: 0882077
1258
+ * port time: Tue Oct 8 10:53:49 EEST 2024
1259
+ * port machine: LM0-400-22516.local
benchmark_results.txt ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ multi-multi tatoeba-test-v2020-07-28-v2023-09-26 0.63895 44.6 10000 77412
2
+ est-deu flores101-devtest 0.55353 25.7 1012 25094
3
+ est-eng flores101-devtest 0.61930 34.7 1012 24721
4
+ est-fra flores101-devtest 0.58199 31.3 1012 28343
5
+ est-por flores101-devtest 0.54388 26.5 1012 26519
6
+ fin-eng flores101-devtest 0.59914 32.2 1012 24721
7
+ fin-por flores101-devtest 0.55156 27.1 1012 26519
8
+ hun-eng flores101-devtest 0.61198 33.5 1012 24721
9
+ hun-fra flores101-devtest 0.57776 30.8 1012 28343
10
+ hun-por flores101-devtest 0.56263 28.4 1012 26519
11
+ hun-spa flores101-devtest 0.49140 20.7 1012 29199
12
+ est-deu flores200-devtest 0.55825 26.3 1012 25094
13
+ est-eng flores200-devtest 0.62404 35.4 1012 24721
14
+ est-fra flores200-devtest 0.58580 31.7 1012 28343
15
+ est-por flores200-devtest 0.55070 27.3 1012 26519
16
+ est-spa flores200-devtest 0.50188 21.5 1012 29199
17
+ fin-deu flores200-devtest 0.54281 24.0 1012 25094
18
+ fin-eng flores200-devtest 0.60642 33.1 1012 24721
19
+ fin-fra flores200-devtest 0.57540 30.5 1012 28343
20
+ fin-por flores200-devtest 0.55497 27.4 1012 26519
21
+ fin-spa flores200-devtest 0.49847 21.4 1012 29199
22
+ hun-deu flores200-devtest 0.55180 25.1 1012 25094
23
+ hun-eng flores200-devtest 0.61466 34.0 1012 24721
24
+ hun-fra flores200-devtest 0.57670 30.6 1012 28343
25
+ hun-por flores200-devtest 0.56510 28.9 1012 26519
26
+ hun-spa flores200-devtest 0.49681 21.3 1012 29199
27
+ hun-deu newssyscomb2009 0.49819 17.9 502 11271
28
+ hun-eng newssyscomb2009 0.52063 24.4 502 11818
29
+ hun-fra newssyscomb2009 0.51589 22.0 502 12331
30
+ hun-spa newssyscomb2009 0.51508 22.7 502 12503
31
+ hun-deu newstest2008 0.50164 19.0 2051 47447
32
+ hun-eng newstest2008 0.49802 20.4 2051 49380
33
+ hun-fra newstest2008 0.51012 21.6 2051 52685
34
+ hun-spa newstest2008 0.50719 22.3 2051 52586
35
+ hun-deu newstest2009 0.49902 18.6 2525 62816
36
+ hun-eng newstest2009 0.50950 22.3 2525 65399
37
+ hun-fra newstest2009 0.50742 21.6 2525 69263
38
+ hun-spa newstest2009 0.50788 22.2 2525 68111
39
+ fin-eng newstest2015 0.55249 27.0 1370 27270
40
+ fin-eng newstest2016 0.57961 30.7 3000 62945
41
+ fin-eng newstest2017 0.59973 33.2 3002 61846
42
+ est-eng newstest2018 0.59190 31.5 2000 45405
43
+ fin-eng newstest2018 0.52373 24.4 3000 62325
44
+ fin-eng newstest2019 0.57079 30.3 1996 36215
45
+ fin-eng newstestB2017 0.56420 28.9 3002 61846
46
+ est-deu ntrex128 0.51377 21.4 1997 48761
47
+ est-eng ntrex128 0.58358 29.9 1997 47673
48
+ est-fra ntrex128 0.52713 24.9 1997 53481
49
+ est-por ntrex128 0.50745 22.2 1997 51631
50
+ est-spa ntrex128 0.54304 27.5 1997 54107
51
+ fin-deu ntrex128 0.50282 19.8 1997 48761
52
+ fin-eng ntrex128 0.55545 26.3 1997 47673
53
+ fin-fra ntrex128 0.50946 22.9 1997 53481
54
+ fin-por ntrex128 0.50404 21.3 1997 51631
55
+ fin-spa ntrex128 0.52641 25.5 1997 54107
56
+ hun-deu ntrex128 0.49322 18.5 1997 48761
57
+ hun-eng ntrex128 0.52964 23.3 1997 47673
58
+ hun-fra ntrex128 0.49800 21.8 1997 53481
59
+ hun-por ntrex128 0.48941 20.5 1997 51631
60
+ hun-spa ntrex128 0.51123 24.2 1997 54107
61
+ est-deu tatoeba-test-v2020-07-28 0.67936 51.8 217 1390
62
+ fin-eng tatoeba-test-v2020-07-28 0.69200 53.1 10000 74651
63
+ fin-fra tatoeba-test-v2020-07-28 0.65899 48.2 1930 12229
64
+ fin-spa tatoeba-test-v2020-07-28 0.69327 51.4 2500 16828
65
+ hun-deu tatoeba-test-v2020-07-28 0.62890 41.9 10000 81699
66
+ hun-eng tatoeba-test-v2020-07-28 0.67153 51.1 10000 69326
67
+ hun-fra tatoeba-test-v2020-07-28 0.66663 49.9 2500 16940
68
+ fin-deu tatoeba-test-v2021-03-30 0.65542 46.9 4984 36070
69
+ fin-eng tatoeba-test-v2021-03-30 0.69200 53.1 10186 76206
70
+ fin-spa tatoeba-test-v2021-03-30 0.69377 51.5 4999 33655
71
+ hun-deu tatoeba-test-v2021-03-30 0.62256 40.9 12232 101962
72
+ hun-eng tatoeba-test-v2021-03-30 0.65658 49.0 11904 85120
73
+ hun-fra tatoeba-test-v2021-03-30 0.66663 49.9 2500 16940
74
+ est-deu tatoeba-test-v2021-08-07 0.69451 53.9 244 1611
75
+ est-eng tatoeba-test-v2021-08-07 0.72437 58.2 1359 8811
76
+ fin-deu tatoeba-test-v2021-08-07 0.66025 47.3 2647 19163
77
+ fin-eng tatoeba-test-v2021-08-07 0.69685 53.7 10690 80552
78
+ fin-fra tatoeba-test-v2021-08-07 0.65900 48.3 1920 12193
79
+ fin-por tatoeba-test-v2021-08-07 0.72250 54.0 477 3021
80
+ fin-spa tatoeba-test-v2021-08-07 0.69600 52.1 2513 16912
81
+ hun-deu tatoeba-test-v2021-08-07 0.62418 41.1 15342 127344
82
+ hun-eng tatoeba-test-v2021-08-07 0.65626 48.7 13037 94699
83
+ hun-fra tatoeba-test-v2021-08-07 0.66840 50.3 2494 16914
84
+ hun-por tatoeba-test-v2021-08-07 0.65281 43.1 2500 16563
85
+ hun-spa tatoeba-test-v2021-08-07 0.67467 48.7 2500 16670
benchmark_translations.zip ADDED
File without changes
config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "pytorch-models/opus-mt-tc-bible-big-fiu-deu_eng_fra_por_spa",
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "relu",
5
+ "architectures": [
6
+ "MarianMTModel"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "bos_token_id": 0,
10
+ "classifier_dropout": 0.0,
11
+ "d_model": 1024,
12
+ "decoder_attention_heads": 16,
13
+ "decoder_ffn_dim": 4096,
14
+ "decoder_layerdrop": 0.0,
15
+ "decoder_layers": 6,
16
+ "decoder_start_token_id": 59381,
17
+ "decoder_vocab_size": 59382,
18
+ "dropout": 0.1,
19
+ "encoder_attention_heads": 16,
20
+ "encoder_ffn_dim": 4096,
21
+ "encoder_layerdrop": 0.0,
22
+ "encoder_layers": 6,
23
+ "eos_token_id": 618,
24
+ "forced_eos_token_id": null,
25
+ "init_std": 0.02,
26
+ "is_encoder_decoder": true,
27
+ "max_length": null,
28
+ "max_position_embeddings": 1024,
29
+ "model_type": "marian",
30
+ "normalize_embedding": false,
31
+ "num_beams": null,
32
+ "num_hidden_layers": 6,
33
+ "pad_token_id": 59381,
34
+ "scale_embedding": true,
35
+ "share_encoder_decoder_embeddings": true,
36
+ "static_position_embeddings": true,
37
+ "torch_dtype": "float32",
38
+ "transformers_version": "4.45.1",
39
+ "use_cache": true,
40
+ "vocab_size": 59382
41
+ }
generation_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bad_words_ids": [
4
+ [
5
+ 59381
6
+ ]
7
+ ],
8
+ "bos_token_id": 0,
9
+ "decoder_start_token_id": 59381,
10
+ "eos_token_id": 618,
11
+ "forced_eos_token_id": 618,
12
+ "max_length": 512,
13
+ "num_beams": 4,
14
+ "pad_token_id": 59381,
15
+ "transformers_version": "4.45.1"
16
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b23ffb5221b498df0eaab1c7046ed101a43b7e97966229500e39681ac5c38617
3
+ size 948925320
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cffc6805a38de06c3222152598bb2532ef9064fc5a8be3f2e4f4ef57d1dde00d
3
+ size 948976581
source.spm ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a365d3ea133001d011349c9b6b9a6afb40ff7653f94459588498632e10430b2
3
+ size 822563
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>"}
target.spm ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de58b541da4a4e79c0f5fd576ad934f66a33c36161f00499cd852eb122ab8ac3
3
+ size 811921
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"source_lang": "fiu", "target_lang": "deu+eng+fra+por+spa", "unk_token": "<unk>", "eos_token": "</s>", "pad_token": "<pad>", "model_max_length": 512, "sp_model_kwargs": {}, "separate_vocabs": false, "special_tokens_map_file": null, "name_or_path": "marian-models/opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-30/fiu-deu+eng+fra+por+spa", "tokenizer_class": "MarianTokenizer"}
vocab.json ADDED
The diff for this file is too large to render. See raw diff