tiedeman commited on
Commit
fbdbc7e
1 Parent(s): 8a38daa

Initial commit

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.spm filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,1506 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ language:
4
+ - anp
5
+ - as
6
+ - awa
7
+ - bho
8
+ - bn
9
+ - bpy
10
+ - de
11
+ - dv
12
+ - en
13
+ - es
14
+ - fr
15
+ - gbm
16
+ - gu
17
+ - hi
18
+ - hif
19
+ - hne
20
+ - hns
21
+ - kok
22
+ - ks
23
+ - lah
24
+ - mag
25
+ - mai
26
+ - mr
27
+ - ne
28
+ - or
29
+ - pa
30
+ - pi
31
+ - pt
32
+ - rhg
33
+ - rmy
34
+ - rom
35
+ - sa
36
+ - sd
37
+ - si
38
+ - skr
39
+ - syl
40
+ - ur
41
+
42
+ tags:
43
+ - translation
44
+ - opus-mt-tc-bible
45
+
46
+ license: apache-2.0
47
+ model-index:
48
+ - name: opus-mt-tc-bible-big-deu_eng_fra_por_spa-inc
49
+ results:
50
+ - task:
51
+ name: Translation deu-ben
52
+ type: translation
53
+ args: deu-ben
54
+ dataset:
55
+ name: flores200-devtest
56
+ type: flores200-devtest
57
+ args: deu-ben
58
+ metrics:
59
+ - name: BLEU
60
+ type: bleu
61
+ value: 11.3
62
+ - name: chr-F
63
+ type: chrf
64
+ value: 0.44696
65
+ - task:
66
+ name: Translation deu-guj
67
+ type: translation
68
+ args: deu-guj
69
+ dataset:
70
+ name: flores200-devtest
71
+ type: flores200-devtest
72
+ args: deu-guj
73
+ metrics:
74
+ - name: BLEU
75
+ type: bleu
76
+ value: 12.0
77
+ - name: chr-F
78
+ type: chrf
79
+ value: 0.40939
80
+ - task:
81
+ name: Translation deu-hin
82
+ type: translation
83
+ args: deu-hin
84
+ dataset:
85
+ name: flores200-devtest
86
+ type: flores200-devtest
87
+ args: deu-hin
88
+ metrics:
89
+ - name: BLEU
90
+ type: bleu
91
+ value: 22.7
92
+ - name: chr-F
93
+ type: chrf
94
+ value: 0.48864
95
+ - task:
96
+ name: Translation deu-hne
97
+ type: translation
98
+ args: deu-hne
99
+ dataset:
100
+ name: flores200-devtest
101
+ type: flores200-devtest
102
+ args: deu-hne
103
+ metrics:
104
+ - name: BLEU
105
+ type: bleu
106
+ value: 14.2
107
+ - name: chr-F
108
+ type: chrf
109
+ value: 0.43166
110
+ - task:
111
+ name: Translation deu-mag
112
+ type: translation
113
+ args: deu-mag
114
+ dataset:
115
+ name: flores200-devtest
116
+ type: flores200-devtest
117
+ args: deu-mag
118
+ metrics:
119
+ - name: BLEU
120
+ type: bleu
121
+ value: 14.2
122
+ - name: chr-F
123
+ type: chrf
124
+ value: 0.43058
125
+ - task:
126
+ name: Translation deu-pan
127
+ type: translation
128
+ args: deu-pan
129
+ dataset:
130
+ name: flores200-devtest
131
+ type: flores200-devtest
132
+ args: deu-pan
133
+ metrics:
134
+ - name: BLEU
135
+ type: bleu
136
+ value: 10.5
137
+ - name: chr-F
138
+ type: chrf
139
+ value: 0.36795
140
+ - task:
141
+ name: Translation deu-urd
142
+ type: translation
143
+ args: deu-urd
144
+ dataset:
145
+ name: flores200-devtest
146
+ type: flores200-devtest
147
+ args: deu-urd
148
+ metrics:
149
+ - name: BLEU
150
+ type: bleu
151
+ value: 14.3
152
+ - name: chr-F
153
+ type: chrf
154
+ value: 0.41167
155
+ - task:
156
+ name: Translation eng-ben
157
+ type: translation
158
+ args: eng-ben
159
+ dataset:
160
+ name: flores200-devtest
161
+ type: flores200-devtest
162
+ args: eng-ben
163
+ metrics:
164
+ - name: BLEU
165
+ type: bleu
166
+ value: 17.7
167
+ - name: chr-F
168
+ type: chrf
169
+ value: 0.52088
170
+ - task:
171
+ name: Translation eng-bho
172
+ type: translation
173
+ args: eng-bho
174
+ dataset:
175
+ name: flores200-devtest
176
+ type: flores200-devtest
177
+ args: eng-bho
178
+ metrics:
179
+ - name: BLEU
180
+ type: bleu
181
+ value: 11.6
182
+ - name: chr-F
183
+ type: chrf
184
+ value: 0.37333
185
+ - task:
186
+ name: Translation eng-guj
187
+ type: translation
188
+ args: eng-guj
189
+ dataset:
190
+ name: flores200-devtest
191
+ type: flores200-devtest
192
+ args: eng-guj
193
+ metrics:
194
+ - name: BLEU
195
+ type: bleu
196
+ value: 23.2
197
+ - name: chr-F
198
+ type: chrf
199
+ value: 0.54758
200
+ - task:
201
+ name: Translation eng-hin
202
+ type: translation
203
+ args: eng-hin
204
+ dataset:
205
+ name: flores200-devtest
206
+ type: flores200-devtest
207
+ args: eng-hin
208
+ metrics:
209
+ - name: BLEU
210
+ type: bleu
211
+ value: 34.4
212
+ - name: chr-F
213
+ type: chrf
214
+ value: 0.58825
215
+ - task:
216
+ name: Translation eng-hne
217
+ type: translation
218
+ args: eng-hne
219
+ dataset:
220
+ name: flores200-devtest
221
+ type: flores200-devtest
222
+ args: eng-hne
223
+ metrics:
224
+ - name: BLEU
225
+ type: bleu
226
+ value: 19.1
227
+ - name: chr-F
228
+ type: chrf
229
+ value: 0.46144
230
+ - task:
231
+ name: Translation eng-mag
232
+ type: translation
233
+ args: eng-mag
234
+ dataset:
235
+ name: flores200-devtest
236
+ type: flores200-devtest
237
+ args: eng-mag
238
+ metrics:
239
+ - name: BLEU
240
+ type: bleu
241
+ value: 21.9
242
+ - name: chr-F
243
+ type: chrf
244
+ value: 0.50291
245
+ - task:
246
+ name: Translation eng-mar
247
+ type: translation
248
+ args: eng-mar
249
+ dataset:
250
+ name: flores200-devtest
251
+ type: flores200-devtest
252
+ args: eng-mar
253
+ metrics:
254
+ - name: BLEU
255
+ type: bleu
256
+ value: 15.6
257
+ - name: chr-F
258
+ type: chrf
259
+ value: 0.49344
260
+ - task:
261
+ name: Translation eng-pan
262
+ type: translation
263
+ args: eng-pan
264
+ dataset:
265
+ name: flores200-devtest
266
+ type: flores200-devtest
267
+ args: eng-pan
268
+ metrics:
269
+ - name: BLEU
270
+ type: bleu
271
+ value: 18.4
272
+ - name: chr-F
273
+ type: chrf
274
+ value: 0.45635
275
+ - task:
276
+ name: Translation eng-sin
277
+ type: translation
278
+ args: eng-sin
279
+ dataset:
280
+ name: flores200-devtest
281
+ type: flores200-devtest
282
+ args: eng-sin
283
+ metrics:
284
+ - name: BLEU
285
+ type: bleu
286
+ value: 11.8
287
+ - name: chr-F
288
+ type: chrf
289
+ value: 0.45683
290
+ - task:
291
+ name: Translation eng-urd
292
+ type: translation
293
+ args: eng-urd
294
+ dataset:
295
+ name: flores200-devtest
296
+ type: flores200-devtest
297
+ args: eng-urd
298
+ metrics:
299
+ - name: BLEU
300
+ type: bleu
301
+ value: 20.6
302
+ - name: chr-F
303
+ type: chrf
304
+ value: 0.48224
305
+ - task:
306
+ name: Translation fra-ben
307
+ type: translation
308
+ args: fra-ben
309
+ dataset:
310
+ name: flores200-devtest
311
+ type: flores200-devtest
312
+ args: fra-ben
313
+ metrics:
314
+ - name: BLEU
315
+ type: bleu
316
+ value: 11.1
317
+ - name: chr-F
318
+ type: chrf
319
+ value: 0.44486
320
+ - task:
321
+ name: Translation fra-guj
322
+ type: translation
323
+ args: fra-guj
324
+ dataset:
325
+ name: flores200-devtest
326
+ type: flores200-devtest
327
+ args: fra-guj
328
+ metrics:
329
+ - name: BLEU
330
+ type: bleu
331
+ value: 12.2
332
+ - name: chr-F
333
+ type: chrf
334
+ value: 0.41021
335
+ - task:
336
+ name: Translation fra-hin
337
+ type: translation
338
+ args: fra-hin
339
+ dataset:
340
+ name: flores200-devtest
341
+ type: flores200-devtest
342
+ args: fra-hin
343
+ metrics:
344
+ - name: BLEU
345
+ type: bleu
346
+ value: 22.7
347
+ - name: chr-F
348
+ type: chrf
349
+ value: 0.48632
350
+ - task:
351
+ name: Translation fra-hne
352
+ type: translation
353
+ args: fra-hne
354
+ dataset:
355
+ name: flores200-devtest
356
+ type: flores200-devtest
357
+ args: fra-hne
358
+ metrics:
359
+ - name: BLEU
360
+ type: bleu
361
+ value: 13.8
362
+ - name: chr-F
363
+ type: chrf
364
+ value: 0.42777
365
+ - task:
366
+ name: Translation fra-mag
367
+ type: translation
368
+ args: fra-mag
369
+ dataset:
370
+ name: flores200-devtest
371
+ type: flores200-devtest
372
+ args: fra-mag
373
+ metrics:
374
+ - name: BLEU
375
+ type: bleu
376
+ value: 14.3
377
+ - name: chr-F
378
+ type: chrf
379
+ value: 0.42725
380
+ - task:
381
+ name: Translation fra-pan
382
+ type: translation
383
+ args: fra-pan
384
+ dataset:
385
+ name: flores200-devtest
386
+ type: flores200-devtest
387
+ args: fra-pan
388
+ metrics:
389
+ - name: BLEU
390
+ type: bleu
391
+ value: 10.6
392
+ - name: chr-F
393
+ type: chrf
394
+ value: 0.36902
395
+ - task:
396
+ name: Translation fra-urd
397
+ type: translation
398
+ args: fra-urd
399
+ dataset:
400
+ name: flores200-devtest
401
+ type: flores200-devtest
402
+ args: fra-urd
403
+ metrics:
404
+ - name: BLEU
405
+ type: bleu
406
+ value: 13.6
407
+ - name: chr-F
408
+ type: chrf
409
+ value: 0.40901
410
+ - task:
411
+ name: Translation por-ben
412
+ type: translation
413
+ args: por-ben
414
+ dataset:
415
+ name: flores200-devtest
416
+ type: flores200-devtest
417
+ args: por-ben
418
+ metrics:
419
+ - name: BLEU
420
+ type: bleu
421
+ value: 10.7
422
+ - name: chr-F
423
+ type: chrf
424
+ value: 0.43877
425
+ - task:
426
+ name: Translation por-guj
427
+ type: translation
428
+ args: por-guj
429
+ dataset:
430
+ name: flores200-devtest
431
+ type: flores200-devtest
432
+ args: por-guj
433
+ metrics:
434
+ - name: BLEU
435
+ type: bleu
436
+ value: 10.9
437
+ - name: chr-F
438
+ type: chrf
439
+ value: 0.38225
440
+ - task:
441
+ name: Translation por-hin
442
+ type: translation
443
+ args: por-hin
444
+ dataset:
445
+ name: flores200-devtest
446
+ type: flores200-devtest
447
+ args: por-hin
448
+ metrics:
449
+ - name: BLEU
450
+ type: bleu
451
+ value: 23.9
452
+ - name: chr-F
453
+ type: chrf
454
+ value: 0.50121
455
+ - task:
456
+ name: Translation por-hne
457
+ type: translation
458
+ args: por-hne
459
+ dataset:
460
+ name: flores200-devtest
461
+ type: flores200-devtest
462
+ args: por-hne
463
+ metrics:
464
+ - name: BLEU
465
+ type: bleu
466
+ value: 14.1
467
+ - name: chr-F
468
+ type: chrf
469
+ value: 0.42270
470
+ - task:
471
+ name: Translation por-mag
472
+ type: translation
473
+ args: por-mag
474
+ dataset:
475
+ name: flores200-devtest
476
+ type: flores200-devtest
477
+ args: por-mag
478
+ metrics:
479
+ - name: BLEU
480
+ type: bleu
481
+ value: 13.7
482
+ - name: chr-F
483
+ type: chrf
484
+ value: 0.42146
485
+ - task:
486
+ name: Translation por-urd
487
+ type: translation
488
+ args: por-urd
489
+ dataset:
490
+ name: flores200-devtest
491
+ type: flores200-devtest
492
+ args: por-urd
493
+ metrics:
494
+ - name: BLEU
495
+ type: bleu
496
+ value: 14.5
497
+ - name: chr-F
498
+ type: chrf
499
+ value: 0.41225
500
+ - task:
501
+ name: Translation spa-hin
502
+ type: translation
503
+ args: spa-hin
504
+ dataset:
505
+ name: flores200-devtest
506
+ type: flores200-devtest
507
+ args: spa-hin
508
+ metrics:
509
+ - name: BLEU
510
+ type: bleu
511
+ value: 16.4
512
+ - name: chr-F
513
+ type: chrf
514
+ value: 0.43977
515
+ - task:
516
+ name: Translation spa-hne
517
+ type: translation
518
+ args: spa-hne
519
+ dataset:
520
+ name: flores200-devtest
521
+ type: flores200-devtest
522
+ args: spa-hne
523
+ metrics:
524
+ - name: BLEU
525
+ type: bleu
526
+ value: 10.8
527
+ - name: chr-F
528
+ type: chrf
529
+ value: 0.39555
530
+ - task:
531
+ name: Translation spa-mag
532
+ type: translation
533
+ args: spa-mag
534
+ dataset:
535
+ name: flores200-devtest
536
+ type: flores200-devtest
537
+ args: spa-mag
538
+ metrics:
539
+ - name: BLEU
540
+ type: bleu
541
+ value: 11.1
542
+ - name: chr-F
543
+ type: chrf
544
+ value: 0.39621
545
+ - task:
546
+ name: Translation spa-urd
547
+ type: translation
548
+ args: spa-urd
549
+ dataset:
550
+ name: flores200-devtest
551
+ type: flores200-devtest
552
+ args: spa-urd
553
+ metrics:
554
+ - name: BLEU
555
+ type: bleu
556
+ value: 10.8
557
+ - name: chr-F
558
+ type: chrf
559
+ value: 0.37993
560
+ - task:
561
+ name: Translation deu-ben
562
+ type: translation
563
+ args: deu-ben
564
+ dataset:
565
+ name: flores101-devtest
566
+ type: flores_101
567
+ args: deu ben devtest
568
+ metrics:
569
+ - name: BLEU
570
+ type: bleu
571
+ value: 10.8
572
+ - name: chr-F
573
+ type: chrf
574
+ value: 0.44269
575
+ - task:
576
+ name: Translation deu-hin
577
+ type: translation
578
+ args: deu-hin
579
+ dataset:
580
+ name: flores101-devtest
581
+ type: flores_101
582
+ args: deu hin devtest
583
+ metrics:
584
+ - name: BLEU
585
+ type: bleu
586
+ value: 21.9
587
+ - name: chr-F
588
+ type: chrf
589
+ value: 0.48314
590
+ - task:
591
+ name: Translation eng-ben
592
+ type: translation
593
+ args: eng-ben
594
+ dataset:
595
+ name: flores101-devtest
596
+ type: flores_101
597
+ args: eng ben devtest
598
+ metrics:
599
+ - name: BLEU
600
+ type: bleu
601
+ value: 17.4
602
+ - name: chr-F
603
+ type: chrf
604
+ value: 0.51768
605
+ - task:
606
+ name: Translation eng-guj
607
+ type: translation
608
+ args: eng-guj
609
+ dataset:
610
+ name: flores101-devtest
611
+ type: flores_101
612
+ args: eng guj devtest
613
+ metrics:
614
+ - name: BLEU
615
+ type: bleu
616
+ value: 22.7
617
+ - name: chr-F
618
+ type: chrf
619
+ value: 0.54325
620
+ - task:
621
+ name: Translation eng-hin
622
+ type: translation
623
+ args: eng-hin
624
+ dataset:
625
+ name: flores101-devtest
626
+ type: flores_101
627
+ args: eng hin devtest
628
+ metrics:
629
+ - name: BLEU
630
+ type: bleu
631
+ value: 34.1
632
+ - name: chr-F
633
+ type: chrf
634
+ value: 0.58472
635
+ - task:
636
+ name: Translation fra-ben
637
+ type: translation
638
+ args: fra-ben
639
+ dataset:
640
+ name: flores101-devtest
641
+ type: flores_101
642
+ args: fra ben devtest
643
+ metrics:
644
+ - name: BLEU
645
+ type: bleu
646
+ value: 11.1
647
+ - name: chr-F
648
+ type: chrf
649
+ value: 0.44304
650
+ - task:
651
+ name: Translation fra-hin
652
+ type: translation
653
+ args: fra-hin
654
+ dataset:
655
+ name: flores101-devtest
656
+ type: flores_101
657
+ args: fra hin devtest
658
+ metrics:
659
+ - name: BLEU
660
+ type: bleu
661
+ value: 22.5
662
+ - name: chr-F
663
+ type: chrf
664
+ value: 0.48245
665
+ - task:
666
+ name: Translation deu-hin
667
+ type: translation
668
+ args: deu-hin
669
+ dataset:
670
+ name: ntrex128
671
+ type: ntrex128
672
+ args: deu-hin
673
+ metrics:
674
+ - name: BLEU
675
+ type: bleu
676
+ value: 17.0
677
+ - name: chr-F
678
+ type: chrf
679
+ value: 0.43252
680
+ - task:
681
+ name: Translation deu-pan
682
+ type: translation
683
+ args: deu-pan
684
+ dataset:
685
+ name: ntrex128
686
+ type: ntrex128
687
+ args: deu-pan
688
+ metrics:
689
+ - name: BLEU
690
+ type: bleu
691
+ value: 10.2
692
+ - name: chr-F
693
+ type: chrf
694
+ value: 0.36448
695
+ - task:
696
+ name: Translation deu-urd
697
+ type: translation
698
+ args: deu-urd
699
+ dataset:
700
+ name: ntrex128
701
+ type: ntrex128
702
+ args: deu-urd
703
+ metrics:
704
+ - name: BLEU
705
+ type: bleu
706
+ value: 14.8
707
+ - name: chr-F
708
+ type: chrf
709
+ value: 0.41844
710
+ - task:
711
+ name: Translation eng-ben
712
+ type: translation
713
+ args: eng-ben
714
+ dataset:
715
+ name: ntrex128
716
+ type: ntrex128
717
+ args: eng-ben
718
+ metrics:
719
+ - name: BLEU
720
+ type: bleu
721
+ value: 17.3
722
+ - name: chr-F
723
+ type: chrf
724
+ value: 0.52381
725
+ - task:
726
+ name: Translation eng-guj
727
+ type: translation
728
+ args: eng-guj
729
+ dataset:
730
+ name: ntrex128
731
+ type: ntrex128
732
+ args: eng-guj
733
+ metrics:
734
+ - name: BLEU
735
+ type: bleu
736
+ value: 17.2
737
+ - name: chr-F
738
+ type: chrf
739
+ value: 0.49386
740
+ - task:
741
+ name: Translation eng-hin
742
+ type: translation
743
+ args: eng-hin
744
+ dataset:
745
+ name: ntrex128
746
+ type: ntrex128
747
+ args: eng-hin
748
+ metrics:
749
+ - name: BLEU
750
+ type: bleu
751
+ value: 27.4
752
+ - name: chr-F
753
+ type: chrf
754
+ value: 0.52696
755
+ - task:
756
+ name: Translation eng-mar
757
+ type: translation
758
+ args: eng-mar
759
+ dataset:
760
+ name: ntrex128
761
+ type: ntrex128
762
+ args: eng-mar
763
+ metrics:
764
+ - name: BLEU
765
+ type: bleu
766
+ value: 10.8
767
+ - name: chr-F
768
+ type: chrf
769
+ value: 0.45244
770
+ - task:
771
+ name: Translation eng-pan
772
+ type: translation
773
+ args: eng-pan
774
+ dataset:
775
+ name: ntrex128
776
+ type: ntrex128
777
+ args: eng-pan
778
+ metrics:
779
+ - name: BLEU
780
+ type: bleu
781
+ value: 19.5
782
+ - name: chr-F
783
+ type: chrf
784
+ value: 0.46534
785
+ - task:
786
+ name: Translation eng-sin
787
+ type: translation
788
+ args: eng-sin
789
+ dataset:
790
+ name: ntrex128
791
+ type: ntrex128
792
+ args: eng-sin
793
+ metrics:
794
+ - name: BLEU
795
+ type: bleu
796
+ value: 10.5
797
+ - name: chr-F
798
+ type: chrf
799
+ value: 0.44124
800
+ - task:
801
+ name: Translation eng-urd
802
+ type: translation
803
+ args: eng-urd
804
+ dataset:
805
+ name: ntrex128
806
+ type: ntrex128
807
+ args: eng-urd
808
+ metrics:
809
+ - name: BLEU
810
+ type: bleu
811
+ value: 22.4
812
+ - name: chr-F
813
+ type: chrf
814
+ value: 0.50060
815
+ - task:
816
+ name: Translation fra-hin
817
+ type: translation
818
+ args: fra-hin
819
+ dataset:
820
+ name: ntrex128
821
+ type: ntrex128
822
+ args: fra-hin
823
+ metrics:
824
+ - name: BLEU
825
+ type: bleu
826
+ value: 17.4
827
+ - name: chr-F
828
+ type: chrf
829
+ value: 0.42777
830
+ - task:
831
+ name: Translation fra-urd
832
+ type: translation
833
+ args: fra-urd
834
+ dataset:
835
+ name: ntrex128
836
+ type: ntrex128
837
+ args: fra-urd
838
+ metrics:
839
+ - name: BLEU
840
+ type: bleu
841
+ value: 14.3
842
+ - name: chr-F
843
+ type: chrf
844
+ value: 0.41229
845
+ - task:
846
+ name: Translation por-ben
847
+ type: translation
848
+ args: por-ben
849
+ dataset:
850
+ name: ntrex128
851
+ type: ntrex128
852
+ args: por-ben
853
+ metrics:
854
+ - name: BLEU
855
+ type: bleu
856
+ value: 10.1
857
+ - name: chr-F
858
+ type: chrf
859
+ value: 0.44134
860
+ - task:
861
+ name: Translation por-hin
862
+ type: translation
863
+ args: por-hin
864
+ dataset:
865
+ name: ntrex128
866
+ type: ntrex128
867
+ args: por-hin
868
+ metrics:
869
+ - name: BLEU
870
+ type: bleu
871
+ value: 17.7
872
+ - name: chr-F
873
+ type: chrf
874
+ value: 0.43461
875
+ - task:
876
+ name: Translation por-urd
877
+ type: translation
878
+ args: por-urd
879
+ dataset:
880
+ name: ntrex128
881
+ type: ntrex128
882
+ args: por-urd
883
+ metrics:
884
+ - name: BLEU
885
+ type: bleu
886
+ value: 14.5
887
+ - name: chr-F
888
+ type: chrf
889
+ value: 0.41777
890
+ - task:
891
+ name: Translation spa-ben
892
+ type: translation
893
+ args: spa-ben
894
+ dataset:
895
+ name: ntrex128
896
+ type: ntrex128
897
+ args: spa-ben
898
+ metrics:
899
+ - name: BLEU
900
+ type: bleu
901
+ value: 10.6
902
+ - name: chr-F
903
+ type: chrf
904
+ value: 0.45329
905
+ - task:
906
+ name: Translation spa-hin
907
+ type: translation
908
+ args: spa-hin
909
+ dataset:
910
+ name: ntrex128
911
+ type: ntrex128
912
+ args: spa-hin
913
+ metrics:
914
+ - name: BLEU
915
+ type: bleu
916
+ value: 17.9
917
+ - name: chr-F
918
+ type: chrf
919
+ value: 0.43747
920
+ - task:
921
+ name: Translation spa-pan
922
+ type: translation
923
+ args: spa-pan
924
+ dataset:
925
+ name: ntrex128
926
+ type: ntrex128
927
+ args: spa-pan
928
+ metrics:
929
+ - name: BLEU
930
+ type: bleu
931
+ value: 10.2
932
+ - name: chr-F
933
+ type: chrf
934
+ value: 0.36716
935
+ - task:
936
+ name: Translation spa-urd
937
+ type: translation
938
+ args: spa-urd
939
+ dataset:
940
+ name: ntrex128
941
+ type: ntrex128
942
+ args: spa-urd
943
+ metrics:
944
+ - name: BLEU
945
+ type: bleu
946
+ value: 14.6
947
+ - name: chr-F
948
+ type: chrf
949
+ value: 0.41929
950
+ - task:
951
+ name: Translation eng-hin
952
+ type: translation
953
+ args: eng-hin
954
+ dataset:
955
+ name: tatoeba-test-v2021-08-07
956
+ type: tatoeba_mt
957
+ args: eng-hin
958
+ metrics:
959
+ - name: BLEU
960
+ type: bleu
961
+ value: 28.1
962
+ - name: chr-F
963
+ type: chrf
964
+ value: 0.52587
965
+ - task:
966
+ name: Translation eng-mar
967
+ type: translation
968
+ args: eng-mar
969
+ dataset:
970
+ name: tatoeba-test-v2021-08-07
971
+ type: tatoeba_mt
972
+ args: eng-mar
973
+ metrics:
974
+ - name: BLEU
975
+ type: bleu
976
+ value: 24.2
977
+ - name: chr-F
978
+ type: chrf
979
+ value: 0.52516
980
+ - task:
981
+ name: Translation multi-multi
982
+ type: translation
983
+ args: multi-multi
984
+ dataset:
985
+ name: tatoeba-test-v2020-07-28-v2023-09-26
986
+ type: tatoeba_mt
987
+ args: multi-multi
988
+ metrics:
989
+ - name: BLEU
990
+ type: bleu
991
+ value: 21.9
992
+ - name: chr-F
993
+ type: chrf
994
+ value: 0.49252
995
+ - task:
996
+ name: Translation eng-ben
997
+ type: translation
998
+ args: eng-ben
999
+ dataset:
1000
+ name: tico19-test
1001
+ type: tico19-test
1002
+ args: eng-ben
1003
+ metrics:
1004
+ - name: BLEU
1005
+ type: bleu
1006
+ value: 18.6
1007
+ - name: chr-F
1008
+ type: chrf
1009
+ value: 0.51850
1010
+ - task:
1011
+ name: Translation eng-hin
1012
+ type: translation
1013
+ args: eng-hin
1014
+ dataset:
1015
+ name: tico19-test
1016
+ type: tico19-test
1017
+ args: eng-hin
1018
+ metrics:
1019
+ - name: BLEU
1020
+ type: bleu
1021
+ value: 41.9
1022
+ - name: chr-F
1023
+ type: chrf
1024
+ value: 0.62999
1025
+ - task:
1026
+ name: Translation eng-mar
1027
+ type: translation
1028
+ args: eng-mar
1029
+ dataset:
1030
+ name: tico19-test
1031
+ type: tico19-test
1032
+ args: eng-mar
1033
+ metrics:
1034
+ - name: BLEU
1035
+ type: bleu
1036
+ value: 13.0
1037
+ - name: chr-F
1038
+ type: chrf
1039
+ value: 0.45968
1040
+ - task:
1041
+ name: Translation eng-nep
1042
+ type: translation
1043
+ args: eng-nep
1044
+ dataset:
1045
+ name: tico19-test
1046
+ type: tico19-test
1047
+ args: eng-nep
1048
+ metrics:
1049
+ - name: BLEU
1050
+ type: bleu
1051
+ value: 18.7
1052
+ - name: chr-F
1053
+ type: chrf
1054
+ value: 0.54373
1055
+ - task:
1056
+ name: Translation eng-urd
1057
+ type: translation
1058
+ args: eng-urd
1059
+ dataset:
1060
+ name: tico19-test
1061
+ type: tico19-test
1062
+ args: eng-urd
1063
+ metrics:
1064
+ - name: BLEU
1065
+ type: bleu
1066
+ value: 21.7
1067
+ - name: chr-F
1068
+ type: chrf
1069
+ value: 0.50920
1070
+ - task:
1071
+ name: Translation fra-hin
1072
+ type: translation
1073
+ args: fra-hin
1074
+ dataset:
1075
+ name: tico19-test
1076
+ type: tico19-test
1077
+ args: fra-hin
1078
+ metrics:
1079
+ - name: BLEU
1080
+ type: bleu
1081
+ value: 25.6
1082
+ - name: chr-F
1083
+ type: chrf
1084
+ value: 0.48666
1085
+ - task:
1086
+ name: Translation fra-nep
1087
+ type: translation
1088
+ args: fra-nep
1089
+ dataset:
1090
+ name: tico19-test
1091
+ type: tico19-test
1092
+ args: fra-nep
1093
+ metrics:
1094
+ - name: BLEU
1095
+ type: bleu
1096
+ value: 10.0
1097
+ - name: chr-F
1098
+ type: chrf
1099
+ value: 0.41414
1100
+ - task:
1101
+ name: Translation fra-urd
1102
+ type: translation
1103
+ args: fra-urd
1104
+ dataset:
1105
+ name: tico19-test
1106
+ type: tico19-test
1107
+ args: fra-urd
1108
+ metrics:
1109
+ - name: BLEU
1110
+ type: bleu
1111
+ value: 13.4
1112
+ - name: chr-F
1113
+ type: chrf
1114
+ value: 0.39479
1115
+ - task:
1116
+ name: Translation por-ben
1117
+ type: translation
1118
+ args: por-ben
1119
+ dataset:
1120
+ name: tico19-test
1121
+ type: tico19-test
1122
+ args: por-ben
1123
+ metrics:
1124
+ - name: BLEU
1125
+ type: bleu
1126
+ value: 12.7
1127
+ - name: chr-F
1128
+ type: chrf
1129
+ value: 0.45609
1130
+ - task:
1131
+ name: Translation por-hin
1132
+ type: translation
1133
+ args: por-hin
1134
+ dataset:
1135
+ name: tico19-test
1136
+ type: tico19-test
1137
+ args: por-hin
1138
+ metrics:
1139
+ - name: BLEU
1140
+ type: bleu
1141
+ value: 31.2
1142
+ - name: chr-F
1143
+ type: chrf
1144
+ value: 0.55530
1145
+ - task:
1146
+ name: Translation por-nep
1147
+ type: translation
1148
+ args: por-nep
1149
+ dataset:
1150
+ name: tico19-test
1151
+ type: tico19-test
1152
+ args: por-nep
1153
+ metrics:
1154
+ - name: BLEU
1155
+ type: bleu
1156
+ value: 12.4
1157
+ - name: chr-F
1158
+ type: chrf
1159
+ value: 0.47698
1160
+ - task:
1161
+ name: Translation por-urd
1162
+ type: translation
1163
+ args: por-urd
1164
+ dataset:
1165
+ name: tico19-test
1166
+ type: tico19-test
1167
+ args: por-urd
1168
+ metrics:
1169
+ - name: BLEU
1170
+ type: bleu
1171
+ value: 15.6
1172
+ - name: chr-F
1173
+ type: chrf
1174
+ value: 0.44747
1175
+ - task:
1176
+ name: Translation spa-ben
1177
+ type: translation
1178
+ args: spa-ben
1179
+ dataset:
1180
+ name: tico19-test
1181
+ type: tico19-test
1182
+ args: spa-ben
1183
+ metrics:
1184
+ - name: BLEU
1185
+ type: bleu
1186
+ value: 13.3
1187
+ - name: chr-F
1188
+ type: chrf
1189
+ value: 0.46418
1190
+ - task:
1191
+ name: Translation spa-hin
1192
+ type: translation
1193
+ args: spa-hin
1194
+ dataset:
1195
+ name: tico19-test
1196
+ type: tico19-test
1197
+ args: spa-hin
1198
+ metrics:
1199
+ - name: BLEU
1200
+ type: bleu
1201
+ value: 31.0
1202
+ - name: chr-F
1203
+ type: chrf
1204
+ value: 0.55526
1205
+ - task:
1206
+ name: Translation spa-mar
1207
+ type: translation
1208
+ args: spa-mar
1209
+ dataset:
1210
+ name: tico19-test
1211
+ type: tico19-test
1212
+ args: spa-mar
1213
+ metrics:
1214
+ - name: BLEU
1215
+ type: bleu
1216
+ value: 10.0
1217
+ - name: chr-F
1218
+ type: chrf
1219
+ value: 0.41189
1220
+ - task:
1221
+ name: Translation spa-nep
1222
+ type: translation
1223
+ args: spa-nep
1224
+ dataset:
1225
+ name: tico19-test
1226
+ type: tico19-test
1227
+ args: spa-nep
1228
+ metrics:
1229
+ - name: BLEU
1230
+ type: bleu
1231
+ value: 12.1
1232
+ - name: chr-F
1233
+ type: chrf
1234
+ value: 0.47414
1235
+ - task:
1236
+ name: Translation spa-urd
1237
+ type: translation
1238
+ args: spa-urd
1239
+ dataset:
1240
+ name: tico19-test
1241
+ type: tico19-test
1242
+ args: spa-urd
1243
+ metrics:
1244
+ - name: BLEU
1245
+ type: bleu
1246
+ value: 15.6
1247
+ - name: chr-F
1248
+ type: chrf
1249
+ value: 0.44788
1250
+ - task:
1251
+ name: Translation eng-hin
1252
+ type: translation
1253
+ args: eng-hin
1254
+ dataset:
1255
+ name: newstest2014
1256
+ type: wmt-2014-news
1257
+ args: eng-hin
1258
+ metrics:
1259
+ - name: BLEU
1260
+ type: bleu
1261
+ value: 24.0
1262
+ - name: chr-F
1263
+ type: chrf
1264
+ value: 0.51541
1265
+ - task:
1266
+ name: Translation eng-guj
1267
+ type: translation
1268
+ args: eng-guj
1269
+ dataset:
1270
+ name: newstest2019
1271
+ type: wmt-2019-news
1272
+ args: eng-guj
1273
+ metrics:
1274
+ - name: BLEU
1275
+ type: bleu
1276
+ value: 25.7
1277
+ - name: chr-F
1278
+ type: chrf
1279
+ value: 0.57815
1280
+ ---
1281
+ # opus-mt-tc-bible-big-deu_eng_fra_por_spa-inc
1282
+
1283
+ ## Table of Contents
1284
+ - [Model Details](#model-details)
1285
+ - [Uses](#uses)
1286
+ - [Risks, Limitations and Biases](#risks-limitations-and-biases)
1287
+ - [How to Get Started With the Model](#how-to-get-started-with-the-model)
1288
+ - [Training](#training)
1289
+ - [Evaluation](#evaluation)
1290
+ - [Citation Information](#citation-information)
1291
+ - [Acknowledgements](#acknowledgements)
1292
+
1293
+ ## Model Details
1294
+
1295
+ Neural machine translation model for translating from unknown (deu+eng+fra+por+spa) to Indic languages (inc).
1296
+
1297
+ This model is part of the [OPUS-MT project](https://github.com/Helsinki-NLP/Opus-MT), an effort to make neural machine translation models widely available and accessible for many languages in the world. All models are originally trained using the amazing framework of [Marian NMT](https://marian-nmt.github.io/), an efficient NMT implementation written in pure C++. The models have been converted to pyTorch using the transformers library by huggingface. Training data is taken from [OPUS](https://opus.nlpl.eu/) and training pipelines use the procedures of [OPUS-MT-train](https://github.com/Helsinki-NLP/Opus-MT-train).
1298
+ **Model Description:**
1299
+ - **Developed by:** Language Technology Research Group at the University of Helsinki
1300
+ - **Model Type:** Translation (transformer-big)
1301
+ - **Release**: 2024-05-30
1302
+ - **License:** Apache-2.0
1303
+ - **Language(s):**
1304
+ - Source Language(s): deu eng fra por spa
1305
+ - Target Language(s): anp asm awa ben bho bpy div dty gbm guj hif hin hne hns kas kok lah mag mai mar nep npi ori pan pli rhg rmy rom san sin skr snd syl urd
1306
+ - Valid Target Language Labels: >>aee<< >>aeq<< >>anp<< >>anr<< >>asm<< >>awa<< >>bdv<< >>ben<< >>bfb<< >>bfy<< >>bfz<< >>bgc<< >>bgd<< >>bge<< >>bgw<< >>bha<< >>bhb<< >>bhd<< >>bhe<< >>bhi<< >>bho<< >>bht<< >>bhu<< >>bjj<< >>bkk<< >>bmj<< >>bns<< >>bpx<< >>bpy<< >>bra<< >>btv<< >>ccp<< >>cdh<< >>cdi<< >>cdj<< >>cih<< >>clh<< >>ctg<< >>dcc<< >>dhn<< >>dho<< >>div<< >>dmk<< >>dml<< >>doi<< >>dry<< >>dty<< >>dub<< >>duh<< >>dwz<< >>emx<< >>gas<< >>gbk<< >>gbl<< >>gbm<< >>gdx<< >>ggg<< >>ghr<< >>gig<< >>gjk<< >>glh<< >>gra<< >>guj<< >>gwc<< >>gwf<< >>gwt<< >>haj<< >>hca<< >>hif<< >>hif_Latn<< >>hii<< >>hin<< >>hin_Latn<< >>hlb<< >>hne<< >>hns<< >>jdg<< >>jml<< >>jnd<< >>jns<< >>kas<< >>kas_Arab<< >>kas_Deva<< >>kbu<< >>keq<< >>key<< >>kfr<< >>kfs<< >>kft<< >>kfu<< >>kfv<< >>kfx<< >>kfy<< >>khn<< >>khw<< >>kjo<< >>kls<< >>kok<< >>kra<< >>ksy<< >>kvx<< >>kxp<< >>kyw<< >>lah<< >>lbm<< >>lhl<< >>lmn<< >>lss<< >>luv<< >>mag<< >>mai<< >>mar<< >>mby<< >>mjl<< >>mjz<< >>mkb<< >>mke<< >>mki<< >>mvy<< >>mwr<< >>nag<< >>nep<< >>nhh<< >>nli<< >>nlx<< >>noe<< >>noi<< >>npi<< >>odk<< >>omr<< >>ori<< >>ort<< >>pan<< >>pan_Guru<< >>paq<< >>pcl<< >>pgg<< >>phd<< >>phl<< >>pli<< >>plk<< >>plp<< >>pmh<< >>psh<< >>psi<< >>psu<< >>pwr<< >>raj<< >>rei<< >>rhg<< >>rhg_Latn<< >>rjs<< >>rkt<< >>rmi<< >>rmq<< >>rmt<< >>rmy<< >>rom<< >>rtw<< >>san<< >>san_Deva<< >>saz<< >>sbn<< >>sck<< >>scl<< >>sdg<< >>sdr<< >>shd<< >>sin<< >>sjp<< >>skr<< >>smm<< >>smv<< >>snd<< >>snd_Arab<< >>soi<< >>srx<< >>ssi<< >>sts<< >>syl<< >>syl_Sylo<< >>tdb<< >>the<< >>thl<< >>thq<< >>thr<< >>tkb<< >>tkt<< >>tnv<< >>tra<< >>trw<< >>urd<< >>ush<< >>vaa<< >>vah<< >>vas<< >>vav<< >>ved<< >>vgr<< >>wsv<< >>wtm<< >>xka<< >>xxx<<
1307
+ - **Original Model**: [opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-30.zip](https://object.pouta.csc.fi/Tatoeba-MT-models/deu+eng+fra+por+spa-inc/opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-30.zip)
1308
+ - **Resources for more information:**
1309
+ - [OPUS-MT dashboard](https://opus.nlpl.eu/dashboard/index.php?pkg=opusmt&test=all&scoreslang=all&chart=standard&model=Tatoeba-MT-models/deu%2Beng%2Bfra%2Bpor%2Bspa-inc/opusTCv20230926max50%2Bbt%2Bjhubc_transformer-big_2024-05-30)
1310
+ - [OPUS-MT-train GitHub Repo](https://github.com/Helsinki-NLP/OPUS-MT-train)
1311
+ - [More information about MarianNMT models in the transformers library](https://huggingface.co/docs/transformers/model_doc/marian)
1312
+ - [Tatoeba Translation Challenge](https://github.com/Helsinki-NLP/Tatoeba-Challenge/)
1313
+ - [HPLT bilingual data v1 (as part of the Tatoeba Translation Challenge dataset)](https://hplt-project.org/datasets/v1)
1314
+ - [A massively parallel Bible corpus](https://aclanthology.org/L14-1215/)
1315
+
1316
+ This is a multilingual translation model with multiple target languages. A sentence initial language token is required in the form of `>>id<<` (id = valid target language ID), e.g. `>>anp<<`
1317
+
1318
+ ## Uses
1319
+
1320
+ This model can be used for translation and text-to-text generation.
1321
+
1322
+ ## Risks, Limitations and Biases
1323
+
1324
+ **CONTENT WARNING: Readers should be aware that the model is trained on various public data sets that may contain content that is disturbing, offensive, and can propagate historical and current stereotypes.**
1325
+
1326
+ Significant research has explored bias and fairness issues with language models (see, e.g., [Sheng et al. (2021)](https://aclanthology.org/2021.acl-long.330.pdf) and [Bender et al. (2021)](https://dl.acm.org/doi/pdf/10.1145/3442188.3445922)).
1327
+
1328
+ ## How to Get Started With the Model
1329
+
1330
+ A short example code:
1331
+
1332
+ ```python
1333
+ from transformers import MarianMTModel, MarianTokenizer
1334
+
1335
+ src_text = [
1336
+ ">>anp<< Replace this with text in an accepted source language.",
1337
+ ">>urd<< This is the second sentence."
1338
+ ]
1339
+
1340
+ model_name = "pytorch-models/opus-mt-tc-bible-big-deu_eng_fra_por_spa-inc"
1341
+ tokenizer = MarianTokenizer.from_pretrained(model_name)
1342
+ model = MarianMTModel.from_pretrained(model_name)
1343
+ translated = model.generate(**tokenizer(src_text, return_tensors="pt", padding=True))
1344
+
1345
+ for t in translated:
1346
+ print( tokenizer.decode(t, skip_special_tokens=True) )
1347
+ ```
1348
+
1349
+ You can also use OPUS-MT models with the transformers pipelines, for example:
1350
+
1351
+ ```python
1352
+ from transformers import pipeline
1353
+ pipe = pipeline("translation", model="Helsinki-NLP/opus-mt-tc-bible-big-deu_eng_fra_por_spa-inc")
1354
+ print(pipe(">>anp<< Replace this with text in an accepted source language."))
1355
+ ```
1356
+
1357
+ ## Training
1358
+
1359
+ - **Data**: opusTCv20230926max50+bt+jhubc ([source](https://github.com/Helsinki-NLP/Tatoeba-Challenge))
1360
+ - **Pre-processing**: SentencePiece (spm32k,spm32k)
1361
+ - **Model Type:** transformer-big
1362
+ - **Original MarianNMT Model**: [opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-30.zip](https://object.pouta.csc.fi/Tatoeba-MT-models/deu+eng+fra+por+spa-inc/opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-30.zip)
1363
+ - **Training Scripts**: [GitHub Repo](https://github.com/Helsinki-NLP/OPUS-MT-train)
1364
+
1365
+ ## Evaluation
1366
+
1367
+ * [Model scores at the OPUS-MT dashboard](https://opus.nlpl.eu/dashboard/index.php?pkg=opusmt&test=all&scoreslang=all&chart=standard&model=Tatoeba-MT-models/deu%2Beng%2Bfra%2Bpor%2Bspa-inc/opusTCv20230926max50%2Bbt%2Bjhubc_transformer-big_2024-05-30)
1368
+ * test set translations: [opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-29.test.txt](https://object.pouta.csc.fi/Tatoeba-MT-models/deu+eng+fra+por+spa-inc/opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-29.test.txt)
1369
+ * test set scores: [opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-29.eval.txt](https://object.pouta.csc.fi/Tatoeba-MT-models/deu+eng+fra+por+spa-inc/opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-29.eval.txt)
1370
+ * benchmark results: [benchmark_results.txt](benchmark_results.txt)
1371
+ * benchmark output: [benchmark_translations.zip](benchmark_translations.zip)
1372
+
1373
+ | langpair | testset | chr-F | BLEU | #sent | #words |
1374
+ |----------|---------|-------|-------|-------|--------|
1375
+ | eng-ben | tatoeba-test-v2021-08-07 | 0.48316 | 18.1 | 2500 | 11654 |
1376
+ | eng-hin | tatoeba-test-v2021-08-07 | 0.52587 | 28.1 | 5000 | 32904 |
1377
+ | eng-mar | tatoeba-test-v2021-08-07 | 0.52516 | 24.2 | 10396 | 61140 |
1378
+ | eng-urd | tatoeba-test-v2021-08-07 | 0.46228 | 18.8 | 1663 | 12155 |
1379
+ | deu-ben | flores101-devtest | 0.44269 | 10.8 | 1012 | 21155 |
1380
+ | deu-hin | flores101-devtest | 0.48314 | 21.9 | 1012 | 27743 |
1381
+ | eng-ben | flores101-devtest | 0.51768 | 17.4 | 1012 | 21155 |
1382
+ | eng-guj | flores101-devtest | 0.54325 | 22.7 | 1012 | 23840 |
1383
+ | eng-hin | flores101-devtest | 0.58472 | 34.1 | 1012 | 27743 |
1384
+ | fra-ben | flores101-devtest | 0.44304 | 11.1 | 1012 | 21155 |
1385
+ | fra-hin | flores101-devtest | 0.48245 | 22.5 | 1012 | 27743 |
1386
+ | deu-ben | flores200-devtest | 0.44696 | 11.3 | 1012 | 21155 |
1387
+ | deu-guj | flores200-devtest | 0.40939 | 12.0 | 1012 | 23840 |
1388
+ | deu-hin | flores200-devtest | 0.48864 | 22.7 | 1012 | 27743 |
1389
+ | deu-hne | flores200-devtest | 0.43166 | 14.2 | 1012 | 26582 |
1390
+ | deu-mag | flores200-devtest | 0.43058 | 14.2 | 1012 | 26516 |
1391
+ | deu-urd | flores200-devtest | 0.41167 | 14.3 | 1012 | 28098 |
1392
+ | eng-ben | flores200-devtest | 0.52088 | 17.7 | 1012 | 21155 |
1393
+ | eng-guj | flores200-devtest | 0.54758 | 23.2 | 1012 | 23840 |
1394
+ | eng-hin | flores200-devtest | 0.58825 | 34.4 | 1012 | 27743 |
1395
+ | eng-hne | flores200-devtest | 0.46144 | 19.1 | 1012 | 26582 |
1396
+ | eng-mag | flores200-devtest | 0.50291 | 21.9 | 1012 | 26516 |
1397
+ | eng-mar | flores200-devtest | 0.49344 | 15.6 | 1012 | 21810 |
1398
+ | eng-pan | flores200-devtest | 0.45635 | 18.4 | 1012 | 27451 |
1399
+ | eng-sin | flores200-devtest | 0.45683 | 11.8 | 1012 | 23278 |
1400
+ | eng-urd | flores200-devtest | 0.48224 | 20.6 | 1012 | 28098 |
1401
+ | fra-ben | flores200-devtest | 0.44486 | 11.1 | 1012 | 21155 |
1402
+ | fra-guj | flores200-devtest | 0.41021 | 12.2 | 1012 | 23840 |
1403
+ | fra-hin | flores200-devtest | 0.48632 | 22.7 | 1012 | 27743 |
1404
+ | fra-hne | flores200-devtest | 0.42777 | 13.8 | 1012 | 26582 |
1405
+ | fra-mag | flores200-devtest | 0.42725 | 14.3 | 1012 | 26516 |
1406
+ | fra-urd | flores200-devtest | 0.40901 | 13.6 | 1012 | 28098 |
1407
+ | por-ben | flores200-devtest | 0.43877 | 10.7 | 1012 | 21155 |
1408
+ | por-hin | flores200-devtest | 0.50121 | 23.9 | 1012 | 27743 |
1409
+ | por-hne | flores200-devtest | 0.42270 | 14.1 | 1012 | 26582 |
1410
+ | por-mag | flores200-devtest | 0.42146 | 13.7 | 1012 | 26516 |
1411
+ | por-san | flores200-devtest | 9.879 | 0.4 | 1012 | 18253 |
1412
+ | por-urd | flores200-devtest | 0.41225 | 14.5 | 1012 | 28098 |
1413
+ | spa-ben | flores200-devtest | 0.42040 | 8.8 | 1012 | 21155 |
1414
+ | spa-hin | flores200-devtest | 0.43977 | 16.4 | 1012 | 27743 |
1415
+ | eng-hin | newstest2014 | 0.51541 | 24.0 | 2507 | 60872 |
1416
+ | eng-guj | newstest2019 | 0.57815 | 25.7 | 998 | 21924 |
1417
+ | deu-ben | ntrex128 | 0.44384 | 9.9 | 1997 | 40095 |
1418
+ | deu-hin | ntrex128 | 0.43252 | 17.0 | 1997 | 55219 |
1419
+ | deu-urd | ntrex128 | 0.41844 | 14.8 | 1997 | 54259 |
1420
+ | eng-ben | ntrex128 | 0.52381 | 17.3 | 1997 | 40095 |
1421
+ | eng-guj | ntrex128 | 0.49386 | 17.2 | 1997 | 45335 |
1422
+ | eng-hin | ntrex128 | 0.52696 | 27.4 | 1997 | 55219 |
1423
+ | eng-mar | ntrex128 | 0.45244 | 10.8 | 1997 | 42375 |
1424
+ | eng-nep | ntrex128 | 0.43339 | 8.8 | 1997 | 40570 |
1425
+ | eng-pan | ntrex128 | 0.46534 | 19.5 | 1997 | 54355 |
1426
+ | eng-sin | ntrex128 | 0.44124 | 10.5 | 1997 | 44429 |
1427
+ | eng-urd | ntrex128 | 0.50060 | 22.4 | 1997 | 54259 |
1428
+ | fra-ben | ntrex128 | 0.42857 | 9.4 | 1997 | 40095 |
1429
+ | fra-hin | ntrex128 | 0.42777 | 17.4 | 1997 | 55219 |
1430
+ | fra-urd | ntrex128 | 0.41229 | 14.3 | 1997 | 54259 |
1431
+ | por-ben | ntrex128 | 0.44134 | 10.1 | 1997 | 40095 |
1432
+ | por-hin | ntrex128 | 0.43461 | 17.7 | 1997 | 55219 |
1433
+ | por-urd | ntrex128 | 0.41777 | 14.5 | 1997 | 54259 |
1434
+ | spa-ben | ntrex128 | 0.45329 | 10.6 | 1997 | 40095 |
1435
+ | spa-hin | ntrex128 | 0.43747 | 17.9 | 1997 | 55219 |
1436
+ | spa-urd | ntrex128 | 0.41929 | 14.6 | 1997 | 54259 |
1437
+ | eng-ben | tico19-test | 0.51850 | 18.6 | 2100 | 51695 |
1438
+ | eng-hin | tico19-test | 0.62999 | 41.9 | 2100 | 62680 |
1439
+ | eng-mar | tico19-test | 0.45968 | 13.0 | 2100 | 50872 |
1440
+ | eng-nep | tico19-test | 0.54373 | 18.7 | 2100 | 48363 |
1441
+ | eng-urd | tico19-test | 0.50920 | 21.7 | 2100 | 65312 |
1442
+ | fra-hin | tico19-test | 0.48666 | 25.6 | 2100 | 62680 |
1443
+ | fra-nep | tico19-test | 0.41414 | 10.0 | 2100 | 48363 |
1444
+ | por-ben | tico19-test | 0.45609 | 12.7 | 2100 | 51695 |
1445
+ | por-hin | tico19-test | 0.55530 | 31.2 | 2100 | 62680 |
1446
+ | por-mar | tico19-test | 0.40344 | 9.7 | 2100 | 50872 |
1447
+ | por-nep | tico19-test | 0.47698 | 12.4 | 2100 | 48363 |
1448
+ | por-urd | tico19-test | 0.44747 | 15.6 | 2100 | 65312 |
1449
+ | spa-ben | tico19-test | 0.46418 | 13.3 | 2100 | 51695 |
1450
+ | spa-hin | tico19-test | 0.55526 | 31.0 | 2100 | 62680 |
1451
+ | spa-mar | tico19-test | 0.41189 | 10.0 | 2100 | 50872 |
1452
+ | spa-nep | tico19-test | 0.47414 | 12.1 | 2100 | 48363 |
1453
+ | spa-urd | tico19-test | 0.44788 | 15.6 | 2100 | 65312 |
1454
+
1455
+ ## Citation Information
1456
+
1457
+ * Publications: [Democratizing neural machine translation with OPUS-MT](https://doi.org/10.1007/s10579-023-09704-w) and [OPUS-MT – Building open translation services for the World](https://aclanthology.org/2020.eamt-1.61/) and [The Tatoeba Translation Challenge – Realistic Data Sets for Low Resource and Multilingual MT](https://aclanthology.org/2020.wmt-1.139/) (Please, cite if you use this model.)
1458
+
1459
+ ```bibtex
1460
+ @article{tiedemann2023democratizing,
1461
+ title={Democratizing neural machine translation with {OPUS-MT}},
1462
+ author={Tiedemann, J{\"o}rg and Aulamo, Mikko and Bakshandaeva, Daria and Boggia, Michele and Gr{\"o}nroos, Stig-Arne and Nieminen, Tommi and Raganato, Alessandro and Scherrer, Yves and Vazquez, Raul and Virpioja, Sami},
1463
+ journal={Language Resources and Evaluation},
1464
+ number={58},
1465
+ pages={713--755},
1466
+ year={2023},
1467
+ publisher={Springer Nature},
1468
+ issn={1574-0218},
1469
+ doi={10.1007/s10579-023-09704-w}
1470
+ }
1471
+
1472
+ @inproceedings{tiedemann-thottingal-2020-opus,
1473
+ title = "{OPUS}-{MT} {--} Building open translation services for the World",
1474
+ author = {Tiedemann, J{\"o}rg and Thottingal, Santhosh},
1475
+ booktitle = "Proceedings of the 22nd Annual Conference of the European Association for Machine Translation",
1476
+ month = nov,
1477
+ year = "2020",
1478
+ address = "Lisboa, Portugal",
1479
+ publisher = "European Association for Machine Translation",
1480
+ url = "https://aclanthology.org/2020.eamt-1.61",
1481
+ pages = "479--480",
1482
+ }
1483
+
1484
+ @inproceedings{tiedemann-2020-tatoeba,
1485
+ title = "The Tatoeba Translation Challenge {--} Realistic Data Sets for Low Resource and Multilingual {MT}",
1486
+ author = {Tiedemann, J{\"o}rg},
1487
+ booktitle = "Proceedings of the Fifth Conference on Machine Translation",
1488
+ month = nov,
1489
+ year = "2020",
1490
+ address = "Online",
1491
+ publisher = "Association for Computational Linguistics",
1492
+ url = "https://aclanthology.org/2020.wmt-1.139",
1493
+ pages = "1174--1182",
1494
+ }
1495
+ ```
1496
+
1497
+ ## Acknowledgements
1498
+
1499
+ The work is supported by the [HPLT project](https://hplt-project.org/), funded by the European Union’s Horizon Europe research and innovation programme under grant agreement No 101070350. We are also grateful for the generous computational resources and IT infrastructure provided by [CSC -- IT Center for Science](https://www.csc.fi/), Finland, and the [EuroHPC supercomputer LUMI](https://www.lumi-supercomputer.eu/).
1500
+
1501
+ ## Model conversion info
1502
+
1503
+ * transformers version: 4.45.1
1504
+ * OPUS-MT git hash: 0882077
1505
+ * port time: Tue Oct 8 10:09:07 EEST 2024
1506
+ * port machine: LM0-400-22516.local
benchmark_results.txt ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ multi-multi tatoeba-test-v2020-07-28-v2023-09-26 0.49252 21.9 10000 60042
2
+ deu-ben flores101-devtest 0.44269 10.8 1012 21155
3
+ deu-hin flores101-devtest 0.48314 21.9 1012 27743
4
+ deu-mar flores101-devtest 0.39479 9.2 1012 21810
5
+ deu-npi flores101-devtest 0.15099 0.2 1012 19762
6
+ eng-ben flores101-devtest 0.51768 17.4 1012 21155
7
+ eng-guj flores101-devtest 0.54325 22.7 1012 23840
8
+ eng-hin flores101-devtest 0.58472 34.1 1012 27743
9
+ fra-ben flores101-devtest 0.44304 11.1 1012 21155
10
+ fra-hin flores101-devtest 0.48245 22.5 1012 27743
11
+ fra-npi flores101-devtest 0.12216 0.2 1012 19762
12
+ spa-guj flores101-devtest 0.37550 8.3 1012 23840
13
+ deu-asm flores200-devtest 0.23161 2.6 1012 21028
14
+ deu-awa flores200-devtest 0.26194 5.0 1012 26642
15
+ deu-ben flores200-devtest 0.44696 11.3 1012 21155
16
+ deu-bho flores200-devtest 0.34690 8.8 1012 27914
17
+ deu-guj flores200-devtest 0.40939 12.0 1012 23840
18
+ deu-hin flores200-devtest 0.48864 22.7 1012 27743
19
+ deu-hne flores200-devtest 0.43166 14.2 1012 26582
20
+ deu-kas_Arab flores200-devtest 0.16579 0.5 1012 23514
21
+ deu-kas_Deva flores200-devtest 0.12989 0.6 1012 26371
22
+ deu-mag flores200-devtest 0.43058 14.2 1012 26516
23
+ deu-mai flores200-devtest 0.38967 8.1 1012 25999
24
+ deu-mar flores200-devtest 0.39856 9.3 1012 21810
25
+ deu-npi flores200-devtest 0.12740 0.2 1012 19762
26
+ deu-pan flores200-devtest 0.36795 10.5 1012 27451
27
+ deu-san flores200-devtest 0.12652 0.5 1012 18253
28
+ deu-sin flores200-devtest 0.37977 7.2 1012 23278
29
+ deu-urd flores200-devtest 0.41167 14.3 1012 28098
30
+ eng-asm flores200-devtest 0.36298 6.1 1012 21028
31
+ eng-awa flores200-devtest 0.17420 2.7 1012 26642
32
+ eng-ben flores200-devtest 0.52088 17.7 1012 21155
33
+ eng-bho flores200-devtest 0.37333 11.6 1012 27914
34
+ eng-guj flores200-devtest 0.54758 23.2 1012 23840
35
+ eng-hin flores200-devtest 0.58825 34.4 1012 27743
36
+ eng-hne flores200-devtest 0.46144 19.1 1012 26582
37
+ eng-kas_Arab flores200-devtest 0.12804 0.5 1012 23514
38
+ eng-kas_Deva flores200-devtest 0.14226 1.0 1012 26371
39
+ eng-mag flores200-devtest 0.50291 21.9 1012 26516
40
+ eng-mai flores200-devtest 0.39362 9.7 1012 25999
41
+ eng-mar flores200-devtest 0.49344 15.6 1012 21810
42
+ eng-npi flores200-devtest 0.18868 0.3 1012 19762
43
+ eng-pan flores200-devtest 0.45635 18.4 1012 27451
44
+ eng-san flores200-devtest 0.13260 0.8 1012 18253
45
+ eng-sin flores200-devtest 0.45683 11.8 1012 23278
46
+ eng-urd flores200-devtest 0.48224 20.6 1012 28098
47
+ fra-asm flores200-devtest 0.24043 2.7 1012 21028
48
+ fra-awa flores200-devtest 0.26156 4.9 1012 26642
49
+ fra-ben flores200-devtest 0.44486 11.1 1012 21155
50
+ fra-bho flores200-devtest 0.34441 9.0 1012 27914
51
+ fra-guj flores200-devtest 0.41021 12.2 1012 23840
52
+ fra-hin flores200-devtest 0.48632 22.7 1012 27743
53
+ fra-hne flores200-devtest 0.42777 13.8 1012 26582
54
+ fra-kas_Arab flores200-devtest 0.16142 0.4 1012 23514
55
+ fra-kas_Deva flores200-devtest 0.12849 0.7 1012 26371
56
+ fra-mag flores200-devtest 0.42725 14.3 1012 26516
57
+ fra-mai flores200-devtest 0.39179 8.5 1012 25999
58
+ fra-mar flores200-devtest 0.38985 9.5 1012 21810
59
+ fra-npi flores200-devtest 0.12358 0.1 1012 19762
60
+ fra-pan flores200-devtest 0.36902 10.6 1012 27451
61
+ fra-san flores200-devtest 0.10558 0.3 1012 18253
62
+ fra-sin flores200-devtest 0.38581 7.3 1012 23278
63
+ fra-urd flores200-devtest 0.40901 13.6 1012 28098
64
+ por-asm flores200-devtest 0.25566 3.1 1012 21028
65
+ por-awa flores200-devtest 0.23673 4.0 1012 26642
66
+ por-ben flores200-devtest 0.43877 10.7 1012 21155
67
+ por-bho flores200-devtest 0.34736 9.2 1012 27914
68
+ por-guj flores200-devtest 0.38225 10.9 1012 23840
69
+ por-hin flores200-devtest 0.50121 23.9 1012 27743
70
+ por-hne flores200-devtest 0.42270 14.1 1012 26582
71
+ por-kas_Arab flores200-devtest 0.15653 0.4 1012 23514
72
+ por-kas_Deva flores200-devtest 0.12836 0.7 1012 26371
73
+ por-mag flores200-devtest 0.42146 13.7 1012 26516
74
+ por-mai flores200-devtest 0.38341 7.7 1012 25999
75
+ por-mar flores200-devtest 0.37814 8.6 1012 21810
76
+ por-npi flores200-devtest 0.12482 0.1 1012 19762
77
+ por-pan flores200-devtest 0.34711 9.2 1012 27451
78
+ por-san flores200-devtest 9.879 0.4 1012 18253
79
+ por-sin flores200-devtest 0.38140 7.4 1012 23278
80
+ por-urd flores200-devtest 0.41225 14.5 1012 28098
81
+ spa-asm flores200-devtest 0.24228 2.1 1012 21028
82
+ spa-awa flores200-devtest 0.24287 3.8 1012 26642
83
+ spa-ben flores200-devtest 0.42040 8.8 1012 21155
84
+ spa-bho flores200-devtest 0.33628 8.0 1012 27914
85
+ spa-guj flores200-devtest 0.37414 8.1 1012 23840
86
+ spa-hin flores200-devtest 0.43977 16.4 1012 27743
87
+ spa-hne flores200-devtest 0.39555 10.8 1012 26582
88
+ spa-kas_Arab flores200-devtest 0.15572 0.4 1012 23514
89
+ spa-kas_Deva flores200-devtest 0.12956 0.6 1012 26371
90
+ spa-mag flores200-devtest 0.39621 11.1 1012 26516
91
+ spa-mai flores200-devtest 0.36462 6.4 1012 25999
92
+ spa-mar flores200-devtest 0.35370 6.5 1012 21810
93
+ spa-npi flores200-devtest 0.12237 0.1 1012 19762
94
+ spa-pan flores200-devtest 0.33808 7.6 1012 27451
95
+ spa-san flores200-devtest 0.11964 0.3 1012 18253
96
+ spa-sin flores200-devtest 0.36322 5.9 1012 23278
97
+ spa-urd flores200-devtest 0.37993 10.8 1012 28098
98
+ eng-hin newstest2014 0.51541 24.0 2507 60872
99
+ eng-guj newstest2019 0.57815 25.7 998 21924
100
+ deu-ben ntrex128 0.44384 9.9 1997 40095
101
+ deu-div ntrex128 0.18948 0.0 1997 37802
102
+ deu-guj ntrex128 0.38060 8.8 1997 45335
103
+ deu-hin ntrex128 0.43252 17.0 1997 55219
104
+ deu-mar ntrex128 0.36605 6.3 1997 42375
105
+ deu-nep ntrex128 0.36728 5.4 1997 40570
106
+ deu-pan ntrex128 0.36448 10.2 1997 54355
107
+ deu-sin ntrex128 0.37092 6.3 1997 44429
108
+ deu-snd_Arab ntrex128 0.248 0.0 1997 49866
109
+ deu-urd ntrex128 0.41844 14.8 1997 54259
110
+ eng-ben ntrex128 0.52381 17.3 1997 40095
111
+ eng-div ntrex128 0.17944 0.1 1997 37802
112
+ eng-guj ntrex128 0.49386 17.2 1997 45335
113
+ eng-hin ntrex128 0.52696 27.4 1997 55219
114
+ eng-mar ntrex128 0.45244 10.8 1997 42375
115
+ eng-nep ntrex128 0.43339 8.8 1997 40570
116
+ eng-pan ntrex128 0.46534 19.5 1997 54355
117
+ eng-sin ntrex128 0.44124 10.5 1997 44429
118
+ eng-snd_Arab ntrex128 0.292 0.0 1997 49866
119
+ eng-urd ntrex128 0.50060 22.4 1997 54259
120
+ fra-ben ntrex128 0.42857 9.4 1997 40095
121
+ fra-div ntrex128 0.18599 0.1 1997 37802
122
+ fra-guj ntrex128 0.37700 8.6 1997 45335
123
+ fra-hin ntrex128 0.42777 17.4 1997 55219
124
+ fra-mar ntrex128 0.35860 6.3 1997 42375
125
+ fra-nep ntrex128 0.36110 5.4 1997 40570
126
+ fra-pan ntrex128 0.35805 9.9 1997 54355
127
+ fra-sin ntrex128 0.36801 6.5 1997 44429
128
+ fra-snd_Arab ntrex128 0.236 0.0 1997 49866
129
+ fra-urd ntrex128 0.41229 14.3 1997 54259
130
+ por-ben ntrex128 0.44134 10.1 1997 40095
131
+ por-div ntrex128 0.18986 0.1 1997 37802
132
+ por-guj ntrex128 0.36989 8.4 1997 45335
133
+ por-hin ntrex128 0.43461 17.7 1997 55219
134
+ por-mar ntrex128 0.35130 6.2 1997 42375
135
+ por-nep ntrex128 0.37236 5.7 1997 40570
136
+ por-pan ntrex128 0.34258 8.7 1997 54355
137
+ por-sin ntrex128 0.37211 6.5 1997 44429
138
+ por-snd_Arab ntrex128 0.220 0.0 1997 49866
139
+ por-urd ntrex128 0.41777 14.5 1997 54259
140
+ spa-ben ntrex128 0.45329 10.6 1997 40095
141
+ spa-div ntrex128 0.18962 0.1 1997 37802
142
+ spa-guj ntrex128 0.38830 9.0 1997 45335
143
+ spa-hin ntrex128 0.43747 17.9 1997 55219
144
+ spa-mar ntrex128 0.35972 6.4 1997 42375
145
+ spa-nep ntrex128 0.37714 5.8 1997 40570
146
+ spa-pan ntrex128 0.36716 10.2 1997 54355
147
+ spa-sin ntrex128 0.38361 7.0 1997 44429
148
+ spa-snd_Arab ntrex128 0.212 0.0 1997 49866
149
+ spa-urd ntrex128 0.41929 14.6 1997 54259
150
+ eng-rom tatoeba-test-v2020-07-28 0.21188 2.3 671 4974
151
+ eng-awa tatoeba-test-v2021-08-07 0.17609 2.3 279 1148
152
+ eng-ben tatoeba-test-v2021-08-07 0.48316 18.1 2500 11654
153
+ eng-hin tatoeba-test-v2021-08-07 0.52587 28.1 5000 32904
154
+ eng-mar tatoeba-test-v2021-08-07 0.52516 24.2 10396 61140
155
+ eng-rom tatoeba-test-v2021-08-07 0.21957 2.2 706 5222
156
+ eng-urd tatoeba-test-v2021-08-07 0.46228 18.8 1663 12155
157
+ eng-ben tico19-test 0.51850 18.6 2100 51695
158
+ eng-hin tico19-test 0.62999 41.9 2100 62680
159
+ eng-mar tico19-test 0.45968 13.0 2100 50872
160
+ eng-nep tico19-test 0.54373 18.7 2100 48363
161
+ eng-urd tico19-test 0.50920 21.7 2100 65312
162
+ fra-ben tico19-test 0.39629 9.7 2100 51695
163
+ fra-hin tico19-test 0.48666 25.6 2100 62680
164
+ fra-mar tico19-test 0.36352 8.1 2100 50872
165
+ fra-nep tico19-test 0.41414 10.0 2100 48363
166
+ fra-urd tico19-test 0.39479 13.4 2100 65312
167
+ por-ben tico19-test 0.45609 12.7 2100 51695
168
+ por-hin tico19-test 0.55530 31.2 2100 62680
169
+ por-mar tico19-test 0.40344 9.7 2100 50872
170
+ por-nep tico19-test 0.47698 12.4 2100 48363
171
+ por-urd tico19-test 0.44747 15.6 2100 65312
172
+ spa-ben tico19-test 0.46418 13.3 2100 51695
173
+ spa-hin tico19-test 0.55526 31.0 2100 62680
174
+ spa-mar tico19-test 0.41189 10.0 2100 50872
175
+ spa-nep tico19-test 0.47414 12.1 2100 48363
176
+ spa-urd tico19-test 0.44788 15.6 2100 65312
benchmark_translations.zip ADDED
File without changes
config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "pytorch-models/opus-mt-tc-bible-big-deu_eng_fra_por_spa-inc",
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "relu",
5
+ "architectures": [
6
+ "MarianMTModel"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "bos_token_id": 0,
10
+ "classifier_dropout": 0.0,
11
+ "d_model": 1024,
12
+ "decoder_attention_heads": 16,
13
+ "decoder_ffn_dim": 4096,
14
+ "decoder_layerdrop": 0.0,
15
+ "decoder_layers": 6,
16
+ "decoder_start_token_id": 61905,
17
+ "decoder_vocab_size": 61906,
18
+ "dropout": 0.1,
19
+ "encoder_attention_heads": 16,
20
+ "encoder_ffn_dim": 4096,
21
+ "encoder_layerdrop": 0.0,
22
+ "encoder_layers": 6,
23
+ "eos_token_id": 512,
24
+ "forced_eos_token_id": null,
25
+ "init_std": 0.02,
26
+ "is_encoder_decoder": true,
27
+ "max_length": null,
28
+ "max_position_embeddings": 1024,
29
+ "model_type": "marian",
30
+ "normalize_embedding": false,
31
+ "num_beams": null,
32
+ "num_hidden_layers": 6,
33
+ "pad_token_id": 61905,
34
+ "scale_embedding": true,
35
+ "share_encoder_decoder_embeddings": true,
36
+ "static_position_embeddings": true,
37
+ "torch_dtype": "float32",
38
+ "transformers_version": "4.45.1",
39
+ "use_cache": true,
40
+ "vocab_size": 61906
41
+ }
generation_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bad_words_ids": [
4
+ [
5
+ 61905
6
+ ]
7
+ ],
8
+ "bos_token_id": 0,
9
+ "decoder_start_token_id": 61905,
10
+ "eos_token_id": 512,
11
+ "forced_eos_token_id": 512,
12
+ "max_length": 512,
13
+ "num_beams": 4,
14
+ "pad_token_id": 61905,
15
+ "transformers_version": "4.45.1"
16
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f47a0dfde3088a518b94a468d65064cbd2e0358b965575153e2d72a5829d6880
3
+ size 959273720
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1376812ebcba33493b0459c4ed414d2607009ebac2d1cdb0d9e067d2c74d867f
3
+ size 959324997
source.spm ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:beeeadf98e7c1cfb3609db2af734a2aad332091428d2c7d6d875073b31fdfd95
3
+ size 802420
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>"}
target.spm ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c7d11c29930951f01ab91b858356808d171afc5cb276211349c2706b09c576b
3
+ size 967595
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"source_lang": "deu+eng+fra+por+spa", "target_lang": "inc", "unk_token": "<unk>", "eos_token": "</s>", "pad_token": "<pad>", "model_max_length": 512, "sp_model_kwargs": {}, "separate_vocabs": false, "special_tokens_map_file": null, "name_or_path": "marian-models/opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-30/deu+eng+fra+por+spa-inc", "tokenizer_class": "MarianTokenizer"}
vocab.json ADDED
The diff for this file is too large to render. See raw diff