tiedeman commited on
Commit
927b1c3
1 Parent(s): a1920c9

Initial commit

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.spm filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,1587 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ language:
4
+ - aa
5
+ - am
6
+ - ar
7
+ - arc
8
+ - bcw
9
+ - byn
10
+ - cop
11
+ - daa
12
+ - de
13
+ - dsh
14
+ - en
15
+ - es
16
+ - fr
17
+ - gde
18
+ - gnd
19
+ - ha
20
+ - hbo
21
+ - he
22
+ - hig
23
+ - irk
24
+ - jpa
25
+ - kab
26
+ - ker
27
+ - kqp
28
+ - ktb
29
+ - kxc
30
+ - lln
31
+ - lme
32
+ - meq
33
+ - mfh
34
+ - mfi
35
+ - mfk
36
+ - mif
37
+ - mpg
38
+ - mqb
39
+ - mt
40
+ - muy
41
+ - oar
42
+ - om
43
+ - pbi
44
+ - phn
45
+ - pt
46
+ - rif
47
+ - sgw
48
+ - shi
49
+ - shy
50
+ - so
51
+ - sur
52
+ - syc
53
+ - syr
54
+ - taq
55
+ - thv
56
+ - ti
57
+ - tig
58
+ - tmc
59
+ - tmh
60
+ - tmr
61
+ - ttr
62
+ - tzm
63
+ - wal
64
+ - xed
65
+ - zgh
66
+
67
+ tags:
68
+ - translation
69
+ - opus-mt-tc-bible
70
+
71
+ license: apache-2.0
72
+ model-index:
73
+ - name: opus-mt-tc-bible-big-afa-deu_eng_fra_por_spa
74
+ results:
75
+ - task:
76
+ name: Translation acm-deu
77
+ type: translation
78
+ args: acm-deu
79
+ dataset:
80
+ name: flores200-devtest
81
+ type: flores200-devtest
82
+ args: acm-deu
83
+ metrics:
84
+ - name: BLEU
85
+ type: bleu
86
+ value: 17.6
87
+ - name: chr-F
88
+ type: chrf
89
+ value: 0.48947
90
+ - task:
91
+ name: Translation acm-eng
92
+ type: translation
93
+ args: acm-eng
94
+ dataset:
95
+ name: flores200-devtest
96
+ type: flores200-devtest
97
+ args: acm-eng
98
+ metrics:
99
+ - name: BLEU
100
+ type: bleu
101
+ value: 28.5
102
+ - name: chr-F
103
+ type: chrf
104
+ value: 0.56799
105
+ - task:
106
+ name: Translation acm-fra
107
+ type: translation
108
+ args: acm-fra
109
+ dataset:
110
+ name: flores200-devtest
111
+ type: flores200-devtest
112
+ args: acm-fra
113
+ metrics:
114
+ - name: BLEU
115
+ type: bleu
116
+ value: 26.1
117
+ - name: chr-F
118
+ type: chrf
119
+ value: 0.53577
120
+ - task:
121
+ name: Translation acm-por
122
+ type: translation
123
+ args: acm-por
124
+ dataset:
125
+ name: flores200-devtest
126
+ type: flores200-devtest
127
+ args: acm-por
128
+ metrics:
129
+ - name: BLEU
130
+ type: bleu
131
+ value: 23.9
132
+ - name: chr-F
133
+ type: chrf
134
+ value: 0.52441
135
+ - task:
136
+ name: Translation acm-spa
137
+ type: translation
138
+ args: acm-spa
139
+ dataset:
140
+ name: flores200-devtest
141
+ type: flores200-devtest
142
+ args: acm-spa
143
+ metrics:
144
+ - name: BLEU
145
+ type: bleu
146
+ value: 18.2
147
+ - name: chr-F
148
+ type: chrf
149
+ value: 0.46985
150
+ - task:
151
+ name: Translation amh-deu
152
+ type: translation
153
+ args: amh-deu
154
+ dataset:
155
+ name: flores200-devtest
156
+ type: flores200-devtest
157
+ args: amh-deu
158
+ metrics:
159
+ - name: BLEU
160
+ type: bleu
161
+ value: 12.6
162
+ - name: chr-F
163
+ type: chrf
164
+ value: 0.41553
165
+ - task:
166
+ name: Translation amh-eng
167
+ type: translation
168
+ args: amh-eng
169
+ dataset:
170
+ name: flores200-devtest
171
+ type: flores200-devtest
172
+ args: amh-eng
173
+ metrics:
174
+ - name: BLEU
175
+ type: bleu
176
+ value: 22.5
177
+ - name: chr-F
178
+ type: chrf
179
+ value: 0.49333
180
+ - task:
181
+ name: Translation amh-fra
182
+ type: translation
183
+ args: amh-fra
184
+ dataset:
185
+ name: flores200-devtest
186
+ type: flores200-devtest
187
+ args: amh-fra
188
+ metrics:
189
+ - name: BLEU
190
+ type: bleu
191
+ value: 17.8
192
+ - name: chr-F
193
+ type: chrf
194
+ value: 0.44890
195
+ - task:
196
+ name: Translation amh-por
197
+ type: translation
198
+ args: amh-por
199
+ dataset:
200
+ name: flores200-devtest
201
+ type: flores200-devtest
202
+ args: amh-por
203
+ metrics:
204
+ - name: BLEU
205
+ type: bleu
206
+ value: 16.5
207
+ - name: chr-F
208
+ type: chrf
209
+ value: 0.43771
210
+ - task:
211
+ name: Translation amh-spa
212
+ type: translation
213
+ args: amh-spa
214
+ dataset:
215
+ name: flores200-devtest
216
+ type: flores200-devtest
217
+ args: amh-spa
218
+ metrics:
219
+ - name: BLEU
220
+ type: bleu
221
+ value: 12.8
222
+ - name: chr-F
223
+ type: chrf
224
+ value: 0.39526
225
+ - task:
226
+ name: Translation apc-deu
227
+ type: translation
228
+ args: apc-deu
229
+ dataset:
230
+ name: flores200-devtest
231
+ type: flores200-devtest
232
+ args: apc-deu
233
+ metrics:
234
+ - name: BLEU
235
+ type: bleu
236
+ value: 16.0
237
+ - name: chr-F
238
+ type: chrf
239
+ value: 0.47480
240
+ - task:
241
+ name: Translation apc-eng
242
+ type: translation
243
+ args: apc-eng
244
+ dataset:
245
+ name: flores200-devtest
246
+ type: flores200-devtest
247
+ args: apc-eng
248
+ metrics:
249
+ - name: BLEU
250
+ type: bleu
251
+ value: 28.1
252
+ - name: chr-F
253
+ type: chrf
254
+ value: 0.56075
255
+ - task:
256
+ name: Translation apc-fra
257
+ type: translation
258
+ args: apc-fra
259
+ dataset:
260
+ name: flores200-devtest
261
+ type: flores200-devtest
262
+ args: apc-fra
263
+ metrics:
264
+ - name: BLEU
265
+ type: bleu
266
+ value: 24.6
267
+ - name: chr-F
268
+ type: chrf
269
+ value: 0.52325
270
+ - task:
271
+ name: Translation apc-por
272
+ type: translation
273
+ args: apc-por
274
+ dataset:
275
+ name: flores200-devtest
276
+ type: flores200-devtest
277
+ args: apc-por
278
+ metrics:
279
+ - name: BLEU
280
+ type: bleu
281
+ value: 22.9
282
+ - name: chr-F
283
+ type: chrf
284
+ value: 0.51055
285
+ - task:
286
+ name: Translation apc-spa
287
+ type: translation
288
+ args: apc-spa
289
+ dataset:
290
+ name: flores200-devtest
291
+ type: flores200-devtest
292
+ args: apc-spa
293
+ metrics:
294
+ - name: BLEU
295
+ type: bleu
296
+ value: 17.2
297
+ - name: chr-F
298
+ type: chrf
299
+ value: 0.45634
300
+ - task:
301
+ name: Translation arz-deu
302
+ type: translation
303
+ args: arz-deu
304
+ dataset:
305
+ name: flores200-devtest
306
+ type: flores200-devtest
307
+ args: arz-deu
308
+ metrics:
309
+ - name: BLEU
310
+ type: bleu
311
+ value: 14.1
312
+ - name: chr-F
313
+ type: chrf
314
+ value: 0.45844
315
+ - task:
316
+ name: Translation arz-eng
317
+ type: translation
318
+ args: arz-eng
319
+ dataset:
320
+ name: flores200-devtest
321
+ type: flores200-devtest
322
+ args: arz-eng
323
+ metrics:
324
+ - name: BLEU
325
+ type: bleu
326
+ value: 22.7
327
+ - name: chr-F
328
+ type: chrf
329
+ value: 0.52534
330
+ - task:
331
+ name: Translation arz-fra
332
+ type: translation
333
+ args: arz-fra
334
+ dataset:
335
+ name: flores200-devtest
336
+ type: flores200-devtest
337
+ args: arz-fra
338
+ metrics:
339
+ - name: BLEU
340
+ type: bleu
341
+ value: 21.8
342
+ - name: chr-F
343
+ type: chrf
344
+ value: 0.50336
345
+ - task:
346
+ name: Translation arz-por
347
+ type: translation
348
+ args: arz-por
349
+ dataset:
350
+ name: flores200-devtest
351
+ type: flores200-devtest
352
+ args: arz-por
353
+ metrics:
354
+ - name: BLEU
355
+ type: bleu
356
+ value: 20.0
357
+ - name: chr-F
358
+ type: chrf
359
+ value: 0.48741
360
+ - task:
361
+ name: Translation arz-spa
362
+ type: translation
363
+ args: arz-spa
364
+ dataset:
365
+ name: flores200-devtest
366
+ type: flores200-devtest
367
+ args: arz-spa
368
+ metrics:
369
+ - name: BLEU
370
+ type: bleu
371
+ value: 15.8
372
+ - name: chr-F
373
+ type: chrf
374
+ value: 0.44516
375
+ - task:
376
+ name: Translation hau-eng
377
+ type: translation
378
+ args: hau-eng
379
+ dataset:
380
+ name: flores200-devtest
381
+ type: flores200-devtest
382
+ args: hau-eng
383
+ metrics:
384
+ - name: BLEU
385
+ type: bleu
386
+ value: 23.4
387
+ - name: chr-F
388
+ type: chrf
389
+ value: 0.48137
390
+ - task:
391
+ name: Translation hau-fra
392
+ type: translation
393
+ args: hau-fra
394
+ dataset:
395
+ name: flores200-devtest
396
+ type: flores200-devtest
397
+ args: hau-fra
398
+ metrics:
399
+ - name: BLEU
400
+ type: bleu
401
+ value: 17.2
402
+ - name: chr-F
403
+ type: chrf
404
+ value: 0.42981
405
+ - task:
406
+ name: Translation hau-por
407
+ type: translation
408
+ args: hau-por
409
+ dataset:
410
+ name: flores200-devtest
411
+ type: flores200-devtest
412
+ args: hau-por
413
+ metrics:
414
+ - name: BLEU
415
+ type: bleu
416
+ value: 15.7
417
+ - name: chr-F
418
+ type: chrf
419
+ value: 0.41385
420
+ - task:
421
+ name: Translation hau-spa
422
+ type: translation
423
+ args: hau-spa
424
+ dataset:
425
+ name: flores200-devtest
426
+ type: flores200-devtest
427
+ args: hau-spa
428
+ metrics:
429
+ - name: BLEU
430
+ type: bleu
431
+ value: 11.7
432
+ - name: chr-F
433
+ type: chrf
434
+ value: 0.37800
435
+ - task:
436
+ name: Translation heb-deu
437
+ type: translation
438
+ args: heb-deu
439
+ dataset:
440
+ name: flores200-devtest
441
+ type: flores200-devtest
442
+ args: heb-deu
443
+ metrics:
444
+ - name: BLEU
445
+ type: bleu
446
+ value: 22.8
447
+ - name: chr-F
448
+ type: chrf
449
+ value: 0.53482
450
+ - task:
451
+ name: Translation heb-eng
452
+ type: translation
453
+ args: heb-eng
454
+ dataset:
455
+ name: flores200-devtest
456
+ type: flores200-devtest
457
+ args: heb-eng
458
+ metrics:
459
+ - name: BLEU
460
+ type: bleu
461
+ value: 38.0
462
+ - name: chr-F
463
+ type: chrf
464
+ value: 0.63368
465
+ - task:
466
+ name: Translation heb-fra
467
+ type: translation
468
+ args: heb-fra
469
+ dataset:
470
+ name: flores200-devtest
471
+ type: flores200-devtest
472
+ args: heb-fra
473
+ metrics:
474
+ - name: BLEU
475
+ type: bleu
476
+ value: 32.6
477
+ - name: chr-F
478
+ type: chrf
479
+ value: 0.58417
480
+ - task:
481
+ name: Translation heb-por
482
+ type: translation
483
+ args: heb-por
484
+ dataset:
485
+ name: flores200-devtest
486
+ type: flores200-devtest
487
+ args: heb-por
488
+ metrics:
489
+ - name: BLEU
490
+ type: bleu
491
+ value: 30.7
492
+ - name: chr-F
493
+ type: chrf
494
+ value: 0.57140
495
+ - task:
496
+ name: Translation kab-eng
497
+ type: translation
498
+ args: kab-eng
499
+ dataset:
500
+ name: flores200-devtest
501
+ type: flores200-devtest
502
+ args: kab-eng
503
+ metrics:
504
+ - name: BLEU
505
+ type: bleu
506
+ value: 10.0
507
+ - name: chr-F
508
+ type: chrf
509
+ value: 0.32121
510
+ - task:
511
+ name: Translation mlt-eng
512
+ type: translation
513
+ args: mlt-eng
514
+ dataset:
515
+ name: flores200-devtest
516
+ type: flores200-devtest
517
+ args: mlt-eng
518
+ metrics:
519
+ - name: BLEU
520
+ type: bleu
521
+ value: 51.1
522
+ - name: chr-F
523
+ type: chrf
524
+ value: 0.73415
525
+ - task:
526
+ name: Translation mlt-fra
527
+ type: translation
528
+ args: mlt-fra
529
+ dataset:
530
+ name: flores200-devtest
531
+ type: flores200-devtest
532
+ args: mlt-fra
533
+ metrics:
534
+ - name: BLEU
535
+ type: bleu
536
+ value: 35.8
537
+ - name: chr-F
538
+ type: chrf
539
+ value: 0.61626
540
+ - task:
541
+ name: Translation mlt-spa
542
+ type: translation
543
+ args: mlt-spa
544
+ dataset:
545
+ name: flores200-devtest
546
+ type: flores200-devtest
547
+ args: mlt-spa
548
+ metrics:
549
+ - name: BLEU
550
+ type: bleu
551
+ value: 21.8
552
+ - name: chr-F
553
+ type: chrf
554
+ value: 0.50534
555
+ - task:
556
+ name: Translation som-eng
557
+ type: translation
558
+ args: som-eng
559
+ dataset:
560
+ name: flores200-devtest
561
+ type: flores200-devtest
562
+ args: som-eng
563
+ metrics:
564
+ - name: BLEU
565
+ type: bleu
566
+ value: 17.7
567
+ - name: chr-F
568
+ type: chrf
569
+ value: 0.42764
570
+ - task:
571
+ name: Translation som-fra
572
+ type: translation
573
+ args: som-fra
574
+ dataset:
575
+ name: flores200-devtest
576
+ type: flores200-devtest
577
+ args: som-fra
578
+ metrics:
579
+ - name: BLEU
580
+ type: bleu
581
+ value: 14.4
582
+ - name: chr-F
583
+ type: chrf
584
+ value: 0.39536
585
+ - task:
586
+ name: Translation som-por
587
+ type: translation
588
+ args: som-por
589
+ dataset:
590
+ name: flores200-devtest
591
+ type: flores200-devtest
592
+ args: som-por
593
+ metrics:
594
+ - name: BLEU
595
+ type: bleu
596
+ value: 14.1
597
+ - name: chr-F
598
+ type: chrf
599
+ value: 0.39440
600
+ - task:
601
+ name: Translation som-spa
602
+ type: translation
603
+ args: som-spa
604
+ dataset:
605
+ name: flores200-devtest
606
+ type: flores200-devtest
607
+ args: som-spa
608
+ metrics:
609
+ - name: BLEU
610
+ type: bleu
611
+ value: 10.0
612
+ - name: chr-F
613
+ type: chrf
614
+ value: 0.35321
615
+ - task:
616
+ name: Translation tir-eng
617
+ type: translation
618
+ args: tir-eng
619
+ dataset:
620
+ name: flores200-devtest
621
+ type: flores200-devtest
622
+ args: tir-eng
623
+ metrics:
624
+ - name: BLEU
625
+ type: bleu
626
+ value: 11.6
627
+ - name: chr-F
628
+ type: chrf
629
+ value: 0.37119
630
+ - task:
631
+ name: Translation amh-deu
632
+ type: translation
633
+ args: amh-deu
634
+ dataset:
635
+ name: flores101-devtest
636
+ type: flores_101
637
+ args: amh deu devtest
638
+ metrics:
639
+ - name: BLEU
640
+ type: bleu
641
+ value: 11.5
642
+ - name: chr-F
643
+ type: chrf
644
+ value: 0.39890
645
+ - task:
646
+ name: Translation amh-eng
647
+ type: translation
648
+ args: amh-eng
649
+ dataset:
650
+ name: flores101-devtest
651
+ type: flores_101
652
+ args: amh eng devtest
653
+ metrics:
654
+ - name: BLEU
655
+ type: bleu
656
+ value: 21.0
657
+ - name: chr-F
658
+ type: chrf
659
+ value: 0.47357
660
+ - task:
661
+ name: Translation amh-fra
662
+ type: translation
663
+ args: amh-fra
664
+ dataset:
665
+ name: flores101-devtest
666
+ type: flores_101
667
+ args: amh fra devtest
668
+ metrics:
669
+ - name: BLEU
670
+ type: bleu
671
+ value: 16.2
672
+ - name: chr-F
673
+ type: chrf
674
+ value: 0.43155
675
+ - task:
676
+ name: Translation amh-por
677
+ type: translation
678
+ args: amh-por
679
+ dataset:
680
+ name: flores101-devtest
681
+ type: flores_101
682
+ args: amh por devtest
683
+ metrics:
684
+ - name: BLEU
685
+ type: bleu
686
+ value: 15.1
687
+ - name: chr-F
688
+ type: chrf
689
+ value: 0.42109
690
+ - task:
691
+ name: Translation amh-spa
692
+ type: translation
693
+ args: amh-spa
694
+ dataset:
695
+ name: flores101-devtest
696
+ type: flores_101
697
+ args: amh spa devtest
698
+ metrics:
699
+ - name: BLEU
700
+ type: bleu
701
+ value: 11.5
702
+ - name: chr-F
703
+ type: chrf
704
+ value: 0.38003
705
+ - task:
706
+ name: Translation ara-deu
707
+ type: translation
708
+ args: ara-deu
709
+ dataset:
710
+ name: flores101-devtest
711
+ type: flores_101
712
+ args: ara deu devtest
713
+ metrics:
714
+ - name: BLEU
715
+ type: bleu
716
+ value: 20.4
717
+ - name: chr-F
718
+ type: chrf
719
+ value: 0.51110
720
+ - task:
721
+ name: Translation ara-fra
722
+ type: translation
723
+ args: ara-fra
724
+ dataset:
725
+ name: flores101-devtest
726
+ type: flores_101
727
+ args: ara fra devtest
728
+ metrics:
729
+ - name: BLEU
730
+ type: bleu
731
+ value: 29.7
732
+ - name: chr-F
733
+ type: chrf
734
+ value: 0.56934
735
+ - task:
736
+ name: Translation ara-por
737
+ type: translation
738
+ args: ara-por
739
+ dataset:
740
+ name: flores101-devtest
741
+ type: flores_101
742
+ args: ara por devtest
743
+ metrics:
744
+ - name: BLEU
745
+ type: bleu
746
+ value: 28.2
747
+ - name: chr-F
748
+ type: chrf
749
+ value: 0.55727
750
+ - task:
751
+ name: Translation ara-spa
752
+ type: translation
753
+ args: ara-spa
754
+ dataset:
755
+ name: flores101-devtest
756
+ type: flores_101
757
+ args: ara spa devtest
758
+ metrics:
759
+ - name: BLEU
760
+ type: bleu
761
+ value: 19.5
762
+ - name: chr-F
763
+ type: chrf
764
+ value: 0.48350
765
+ - task:
766
+ name: Translation hau-eng
767
+ type: translation
768
+ args: hau-eng
769
+ dataset:
770
+ name: flores101-devtest
771
+ type: flores_101
772
+ args: hau eng devtest
773
+ metrics:
774
+ - name: BLEU
775
+ type: bleu
776
+ value: 21.6
777
+ - name: chr-F
778
+ type: chrf
779
+ value: 0.46804
780
+ - task:
781
+ name: Translation hau-fra
782
+ type: translation
783
+ args: hau-fra
784
+ dataset:
785
+ name: flores101-devtest
786
+ type: flores_101
787
+ args: hau fra devtest
788
+ metrics:
789
+ - name: BLEU
790
+ type: bleu
791
+ value: 15.9
792
+ - name: chr-F
793
+ type: chrf
794
+ value: 0.41827
795
+ - task:
796
+ name: Translation hau-spa
797
+ type: translation
798
+ args: hau-spa
799
+ dataset:
800
+ name: flores101-devtest
801
+ type: flores_101
802
+ args: hau spa devtest
803
+ metrics:
804
+ - name: BLEU
805
+ type: bleu
806
+ value: 11.5
807
+ - name: chr-F
808
+ type: chrf
809
+ value: 0.37201
810
+ - task:
811
+ name: Translation heb-eng
812
+ type: translation
813
+ args: heb-eng
814
+ dataset:
815
+ name: flores101-devtest
816
+ type: flores_101
817
+ args: heb eng devtest
818
+ metrics:
819
+ - name: BLEU
820
+ type: bleu
821
+ value: 36.6
822
+ - name: chr-F
823
+ type: chrf
824
+ value: 0.62422
825
+ - task:
826
+ name: Translation mlt-eng
827
+ type: translation
828
+ args: mlt-eng
829
+ dataset:
830
+ name: flores101-devtest
831
+ type: flores_101
832
+ args: mlt eng devtest
833
+ metrics:
834
+ - name: BLEU
835
+ type: bleu
836
+ value: 49.1
837
+ - name: chr-F
838
+ type: chrf
839
+ value: 0.72390
840
+ - task:
841
+ name: Translation mlt-fra
842
+ type: translation
843
+ args: mlt-fra
844
+ dataset:
845
+ name: flores101-devtest
846
+ type: flores_101
847
+ args: mlt fra devtest
848
+ metrics:
849
+ - name: BLEU
850
+ type: bleu
851
+ value: 34.7
852
+ - name: chr-F
853
+ type: chrf
854
+ value: 0.60840
855
+ - task:
856
+ name: Translation mlt-por
857
+ type: translation
858
+ args: mlt-por
859
+ dataset:
860
+ name: flores101-devtest
861
+ type: flores_101
862
+ args: mlt por devtest
863
+ metrics:
864
+ - name: BLEU
865
+ type: bleu
866
+ value: 31.8
867
+ - name: chr-F
868
+ type: chrf
869
+ value: 0.59863
870
+ - task:
871
+ name: Translation amh-eng
872
+ type: translation
873
+ args: amh-eng
874
+ dataset:
875
+ name: ntrex128
876
+ type: ntrex128
877
+ args: amh-eng
878
+ metrics:
879
+ - name: BLEU
880
+ type: bleu
881
+ value: 15.0
882
+ - name: chr-F
883
+ type: chrf
884
+ value: 0.42042
885
+ - task:
886
+ name: Translation amh-fra
887
+ type: translation
888
+ args: amh-fra
889
+ dataset:
890
+ name: ntrex128
891
+ type: ntrex128
892
+ args: amh-fra
893
+ metrics:
894
+ - name: BLEU
895
+ type: bleu
896
+ value: 10.8
897
+ - name: chr-F
898
+ type: chrf
899
+ value: 0.37274
900
+ - task:
901
+ name: Translation amh-spa
902
+ type: translation
903
+ args: amh-spa
904
+ dataset:
905
+ name: ntrex128
906
+ type: ntrex128
907
+ args: amh-spa
908
+ metrics:
909
+ - name: BLEU
910
+ type: bleu
911
+ value: 12.1
912
+ - name: chr-F
913
+ type: chrf
914
+ value: 0.38306
915
+ - task:
916
+ name: Translation hau-eng
917
+ type: translation
918
+ args: hau-eng
919
+ dataset:
920
+ name: ntrex128
921
+ type: ntrex128
922
+ args: hau-eng
923
+ metrics:
924
+ - name: BLEU
925
+ type: bleu
926
+ value: 26.1
927
+ - name: chr-F
928
+ type: chrf
929
+ value: 0.50349
930
+ - task:
931
+ name: Translation hau-fra
932
+ type: translation
933
+ args: hau-fra
934
+ dataset:
935
+ name: ntrex128
936
+ type: ntrex128
937
+ args: hau-fra
938
+ metrics:
939
+ - name: BLEU
940
+ type: bleu
941
+ value: 15.8
942
+ - name: chr-F
943
+ type: chrf
944
+ value: 0.41837
945
+ - task:
946
+ name: Translation hau-por
947
+ type: translation
948
+ args: hau-por
949
+ dataset:
950
+ name: ntrex128
951
+ type: ntrex128
952
+ args: hau-por
953
+ metrics:
954
+ - name: BLEU
955
+ type: bleu
956
+ value: 15.3
957
+ - name: chr-F
958
+ type: chrf
959
+ value: 0.40851
960
+ - task:
961
+ name: Translation hau-spa
962
+ type: translation
963
+ args: hau-spa
964
+ dataset:
965
+ name: ntrex128
966
+ type: ntrex128
967
+ args: hau-spa
968
+ metrics:
969
+ - name: BLEU
970
+ type: bleu
971
+ value: 18.5
972
+ - name: chr-F
973
+ type: chrf
974
+ value: 0.43376
975
+ - task:
976
+ name: Translation heb-deu
977
+ type: translation
978
+ args: heb-deu
979
+ dataset:
980
+ name: ntrex128
981
+ type: ntrex128
982
+ args: heb-deu
983
+ metrics:
984
+ - name: BLEU
985
+ type: bleu
986
+ value: 17.7
987
+ - name: chr-F
988
+ type: chrf
989
+ value: 0.49482
990
+ - task:
991
+ name: Translation heb-eng
992
+ type: translation
993
+ args: heb-eng
994
+ dataset:
995
+ name: ntrex128
996
+ type: ntrex128
997
+ args: heb-eng
998
+ metrics:
999
+ - name: BLEU
1000
+ type: bleu
1001
+ value: 31.3
1002
+ - name: chr-F
1003
+ type: chrf
1004
+ value: 0.59241
1005
+ - task:
1006
+ name: Translation heb-fra
1007
+ type: translation
1008
+ args: heb-fra
1009
+ dataset:
1010
+ name: ntrex128
1011
+ type: ntrex128
1012
+ args: heb-fra
1013
+ metrics:
1014
+ - name: BLEU
1015
+ type: bleu
1016
+ value: 24.0
1017
+ - name: chr-F
1018
+ type: chrf
1019
+ value: 0.52180
1020
+ - task:
1021
+ name: Translation heb-por
1022
+ type: translation
1023
+ args: heb-por
1024
+ dataset:
1025
+ name: ntrex128
1026
+ type: ntrex128
1027
+ args: heb-por
1028
+ metrics:
1029
+ - name: BLEU
1030
+ type: bleu
1031
+ value: 23.2
1032
+ - name: chr-F
1033
+ type: chrf
1034
+ value: 0.51248
1035
+ - task:
1036
+ name: Translation mlt-spa
1037
+ type: translation
1038
+ args: mlt-spa
1039
+ dataset:
1040
+ name: ntrex128
1041
+ type: ntrex128
1042
+ args: mlt-spa
1043
+ metrics:
1044
+ - name: BLEU
1045
+ type: bleu
1046
+ value: 30.9
1047
+ - name: chr-F
1048
+ type: chrf
1049
+ value: 0.57078
1050
+ - task:
1051
+ name: Translation som-deu
1052
+ type: translation
1053
+ args: som-deu
1054
+ dataset:
1055
+ name: ntrex128
1056
+ type: ntrex128
1057
+ args: som-deu
1058
+ metrics:
1059
+ - name: BLEU
1060
+ type: bleu
1061
+ value: 11.0
1062
+ - name: chr-F
1063
+ type: chrf
1064
+ value: 0.39394
1065
+ - task:
1066
+ name: Translation som-eng
1067
+ type: translation
1068
+ args: som-eng
1069
+ dataset:
1070
+ name: ntrex128
1071
+ type: ntrex128
1072
+ args: som-eng
1073
+ metrics:
1074
+ - name: BLEU
1075
+ type: bleu
1076
+ value: 24.3
1077
+ - name: chr-F
1078
+ type: chrf
1079
+ value: 0.49187
1080
+ - task:
1081
+ name: Translation som-fra
1082
+ type: translation
1083
+ args: som-fra
1084
+ dataset:
1085
+ name: ntrex128
1086
+ type: ntrex128
1087
+ args: som-fra
1088
+ metrics:
1089
+ - name: BLEU
1090
+ type: bleu
1091
+ value: 15.1
1092
+ - name: chr-F
1093
+ type: chrf
1094
+ value: 0.41236
1095
+ - task:
1096
+ name: Translation som-por
1097
+ type: translation
1098
+ args: som-por
1099
+ dataset:
1100
+ name: ntrex128
1101
+ type: ntrex128
1102
+ args: som-por
1103
+ metrics:
1104
+ - name: BLEU
1105
+ type: bleu
1106
+ value: 15.2
1107
+ - name: chr-F
1108
+ type: chrf
1109
+ value: 0.41550
1110
+ - task:
1111
+ name: Translation som-spa
1112
+ type: translation
1113
+ args: som-spa
1114
+ dataset:
1115
+ name: ntrex128
1116
+ type: ntrex128
1117
+ args: som-spa
1118
+ metrics:
1119
+ - name: BLEU
1120
+ type: bleu
1121
+ value: 17.6
1122
+ - name: chr-F
1123
+ type: chrf
1124
+ value: 0.43278
1125
+ - task:
1126
+ name: Translation tir-eng
1127
+ type: translation
1128
+ args: tir-eng
1129
+ dataset:
1130
+ name: ntrex128
1131
+ type: ntrex128
1132
+ args: tir-eng
1133
+ metrics:
1134
+ - name: BLEU
1135
+ type: bleu
1136
+ value: 12.5
1137
+ - name: chr-F
1138
+ type: chrf
1139
+ value: 0.37185
1140
+ - task:
1141
+ name: Translation tir-spa
1142
+ type: translation
1143
+ args: tir-spa
1144
+ dataset:
1145
+ name: ntrex128
1146
+ type: ntrex128
1147
+ args: tir-spa
1148
+ metrics:
1149
+ - name: BLEU
1150
+ type: bleu
1151
+ value: 10.2
1152
+ - name: chr-F
1153
+ type: chrf
1154
+ value: 0.34985
1155
+ - task:
1156
+ name: Translation amh-eng
1157
+ type: translation
1158
+ args: amh-eng
1159
+ dataset:
1160
+ name: tatoeba-test-v2021-03-30
1161
+ type: tatoeba_mt
1162
+ args: amh-eng
1163
+ metrics:
1164
+ - name: BLEU
1165
+ type: bleu
1166
+ value: 59.5
1167
+ - name: chr-F
1168
+ type: chrf
1169
+ value: 0.70202
1170
+ - task:
1171
+ name: Translation ara-deu
1172
+ type: translation
1173
+ args: ara-deu
1174
+ dataset:
1175
+ name: tatoeba-test-v2021-08-07
1176
+ type: tatoeba_mt
1177
+ args: ara-deu
1178
+ metrics:
1179
+ - name: BLEU
1180
+ type: bleu
1181
+ value: 41.7
1182
+ - name: chr-F
1183
+ type: chrf
1184
+ value: 0.61039
1185
+ - task:
1186
+ name: Translation ara-eng
1187
+ type: translation
1188
+ args: ara-eng
1189
+ dataset:
1190
+ name: tatoeba-test-v2020-07-28
1191
+ type: tatoeba_mt
1192
+ args: ara-eng
1193
+ metrics:
1194
+ - name: BLEU
1195
+ type: bleu
1196
+ value: 42.8
1197
+ - name: chr-F
1198
+ type: chrf
1199
+ value: 0.60134
1200
+ - task:
1201
+ name: Translation ara-fra
1202
+ type: translation
1203
+ args: ara-fra
1204
+ dataset:
1205
+ name: tatoeba-test-v2021-08-07
1206
+ type: tatoeba_mt
1207
+ args: ara-fra
1208
+ metrics:
1209
+ - name: BLEU
1210
+ type: bleu
1211
+ value: 38.8
1212
+ - name: chr-F
1213
+ type: chrf
1214
+ value: 0.56120
1215
+ - task:
1216
+ name: Translation ara-spa
1217
+ type: translation
1218
+ args: ara-spa
1219
+ dataset:
1220
+ name: tatoeba-test-v2021-08-07
1221
+ type: tatoeba_mt
1222
+ args: ara-spa
1223
+ metrics:
1224
+ - name: BLEU
1225
+ type: bleu
1226
+ value: 43.7
1227
+ - name: chr-F
1228
+ type: chrf
1229
+ value: 0.62567
1230
+ - task:
1231
+ name: Translation heb-deu
1232
+ type: translation
1233
+ args: heb-deu
1234
+ dataset:
1235
+ name: tatoeba-test-v2021-08-07
1236
+ type: tatoeba_mt
1237
+ args: heb-deu
1238
+ metrics:
1239
+ - name: BLEU
1240
+ type: bleu
1241
+ value: 42.4
1242
+ - name: chr-F
1243
+ type: chrf
1244
+ value: 0.63131
1245
+ - task:
1246
+ name: Translation heb-eng
1247
+ type: translation
1248
+ args: heb-eng
1249
+ dataset:
1250
+ name: tatoeba-test-v2021-08-07
1251
+ type: tatoeba_mt
1252
+ args: heb-eng
1253
+ metrics:
1254
+ - name: BLEU
1255
+ type: bleu
1256
+ value: 49.2
1257
+ - name: chr-F
1258
+ type: chrf
1259
+ value: 0.64960
1260
+ - task:
1261
+ name: Translation heb-fra
1262
+ type: translation
1263
+ args: heb-fra
1264
+ dataset:
1265
+ name: tatoeba-test-v2021-08-07
1266
+ type: tatoeba_mt
1267
+ args: heb-fra
1268
+ metrics:
1269
+ - name: BLEU
1270
+ type: bleu
1271
+ value: 46.3
1272
+ - name: chr-F
1273
+ type: chrf
1274
+ value: 0.64348
1275
+ - task:
1276
+ name: Translation heb-por
1277
+ type: translation
1278
+ args: heb-por
1279
+ dataset:
1280
+ name: tatoeba-test-v2021-08-07
1281
+ type: tatoeba_mt
1282
+ args: heb-por
1283
+ metrics:
1284
+ - name: BLEU
1285
+ type: bleu
1286
+ value: 43.2
1287
+ - name: chr-F
1288
+ type: chrf
1289
+ value: 0.63350
1290
+ - task:
1291
+ name: Translation kab-eng
1292
+ type: translation
1293
+ args: kab-eng
1294
+ dataset:
1295
+ name: tatoeba-test-v2021-03-30
1296
+ type: tatoeba_mt
1297
+ args: kab-eng
1298
+ metrics:
1299
+ - name: BLEU
1300
+ type: bleu
1301
+ value: 27.8
1302
+ - name: chr-F
1303
+ type: chrf
1304
+ value: 0.44646
1305
+ - task:
1306
+ name: Translation kab-spa
1307
+ type: translation
1308
+ args: kab-spa
1309
+ dataset:
1310
+ name: tatoeba-test-v2020-07-28
1311
+ type: tatoeba_mt
1312
+ args: kab-spa
1313
+ metrics:
1314
+ - name: BLEU
1315
+ type: bleu
1316
+ value: 23.4
1317
+ - name: chr-F
1318
+ type: chrf
1319
+ value: 0.41526
1320
+ - task:
1321
+ name: Translation mlt-eng
1322
+ type: translation
1323
+ args: mlt-eng
1324
+ dataset:
1325
+ name: tatoeba-test-v2021-08-07
1326
+ type: tatoeba_mt
1327
+ args: mlt-eng
1328
+ metrics:
1329
+ - name: BLEU
1330
+ type: bleu
1331
+ value: 51.0
1332
+ - name: chr-F
1333
+ type: chrf
1334
+ value: 0.66653
1335
+ - task:
1336
+ name: Translation multi-multi
1337
+ type: translation
1338
+ args: multi-multi
1339
+ dataset:
1340
+ name: tatoeba-test-v2020-07-28-v2023-09-26
1341
+ type: tatoeba_mt
1342
+ args: multi-multi
1343
+ metrics:
1344
+ - name: BLEU
1345
+ type: bleu
1346
+ value: 34.0
1347
+ - name: chr-F
1348
+ type: chrf
1349
+ value: 0.51369
1350
+ - task:
1351
+ name: Translation hau-eng
1352
+ type: translation
1353
+ args: hau-eng
1354
+ dataset:
1355
+ name: newstest2021
1356
+ type: wmt-2021-news
1357
+ args: hau-eng
1358
+ metrics:
1359
+ - name: BLEU
1360
+ type: bleu
1361
+ value: 15.5
1362
+ - name: chr-F
1363
+ type: chrf
1364
+ value: 0.43744
1365
+ ---
1366
+ # opus-mt-tc-bible-big-afa-deu_eng_fra_por_spa
1367
+
1368
+ ## Table of Contents
1369
+ - [Model Details](#model-details)
1370
+ - [Uses](#uses)
1371
+ - [Risks, Limitations and Biases](#risks-limitations-and-biases)
1372
+ - [How to Get Started With the Model](#how-to-get-started-with-the-model)
1373
+ - [Training](#training)
1374
+ - [Evaluation](#evaluation)
1375
+ - [Citation Information](#citation-information)
1376
+ - [Acknowledgements](#acknowledgements)
1377
+
1378
+ ## Model Details
1379
+
1380
+ Neural machine translation model for translating from Afro-Asiatic languages (afa) to unknown (deu+eng+fra+por+spa).
1381
+
1382
+ This model is part of the [OPUS-MT project](https://github.com/Helsinki-NLP/Opus-MT), an effort to make neural machine translation models widely available and accessible for many languages in the world. All models are originally trained using the amazing framework of [Marian NMT](https://marian-nmt.github.io/), an efficient NMT implementation written in pure C++. The models have been converted to pyTorch using the transformers library by huggingface. Training data is taken from [OPUS](https://opus.nlpl.eu/) and training pipelines use the procedures of [OPUS-MT-train](https://github.com/Helsinki-NLP/Opus-MT-train).
1383
+ **Model Description:**
1384
+ - **Developed by:** Language Technology Research Group at the University of Helsinki
1385
+ - **Model Type:** Translation (transformer-big)
1386
+ - **Release**: 2024-05-29
1387
+ - **License:** Apache-2.0
1388
+ - **Language(s):**
1389
+ - Source Language(s): aar acm afb amh apc ara arc arq arz bcw byn cop daa dsh gde gnd hau hbo heb hig irk jpa kab ker kqp ktb kxc lln lme meq mfh mfi mfk mif mlt mpg mqb muy oar orm pbi phn rif sgw shi shy som sur syc syr taq thv tig tir tmc tmh tmr ttr tzm wal xed zgh
1390
+ - Target Language(s): deu eng fra por spa
1391
+ - Valid Target Language Labels: >>deu<< >>eng<< >>fra<< >>por<< >>spa<< >>xxx<<
1392
+ - **Original Model**: [opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-29.zip](https://object.pouta.csc.fi/Tatoeba-MT-models/afa-deu+eng+fra+por+spa/opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-29.zip)
1393
+ - **Resources for more information:**
1394
+ - [OPUS-MT dashboard](https://opus.nlpl.eu/dashboard/index.php?pkg=opusmt&test=all&scoreslang=all&chart=standard&model=Tatoeba-MT-models/afa-deu%2Beng%2Bfra%2Bpor%2Bspa/opusTCv20230926max50%2Bbt%2Bjhubc_transformer-big_2024-05-29)
1395
+ - [OPUS-MT-train GitHub Repo](https://github.com/Helsinki-NLP/OPUS-MT-train)
1396
+ - [More information about MarianNMT models in the transformers library](https://huggingface.co/docs/transformers/model_doc/marian)
1397
+ - [Tatoeba Translation Challenge](https://github.com/Helsinki-NLP/Tatoeba-Challenge/)
1398
+ - [HPLT bilingual data v1 (as part of the Tatoeba Translation Challenge dataset)](https://hplt-project.org/datasets/v1)
1399
+ - [A massively parallel Bible corpus](https://aclanthology.org/L14-1215/)
1400
+
1401
+ This is a multilingual translation model with multiple target languages. A sentence initial language token is required in the form of `>>id<<` (id = valid target language ID), e.g. `>>deu<<`
1402
+
1403
+ ## Uses
1404
+
1405
+ This model can be used for translation and text-to-text generation.
1406
+
1407
+ ## Risks, Limitations and Biases
1408
+
1409
+ **CONTENT WARNING: Readers should be aware that the model is trained on various public data sets that may contain content that is disturbing, offensive, and can propagate historical and current stereotypes.**
1410
+
1411
+ Significant research has explored bias and fairness issues with language models (see, e.g., [Sheng et al. (2021)](https://aclanthology.org/2021.acl-long.330.pdf) and [Bender et al. (2021)](https://dl.acm.org/doi/pdf/10.1145/3442188.3445922)).
1412
+
1413
+ ## How to Get Started With the Model
1414
+
1415
+ A short example code:
1416
+
1417
+ ```python
1418
+ from transformers import MarianMTModel, MarianTokenizer
1419
+
1420
+ src_text = [
1421
+ ">>eng<< Anta i ak-d-yennan ur yerbiḥ ara Tom?",
1422
+ ">>fra<< Iselman d aɣbalu axatar i wučči n yemdanen."
1423
+ ]
1424
+
1425
+ model_name = "pytorch-models/opus-mt-tc-bible-big-afa-deu_eng_fra_por_spa"
1426
+ tokenizer = MarianTokenizer.from_pretrained(model_name)
1427
+ model = MarianMTModel.from_pretrained(model_name)
1428
+ translated = model.generate(**tokenizer(src_text, return_tensors="pt", padding=True))
1429
+
1430
+ for t in translated:
1431
+ print( tokenizer.decode(t, skip_special_tokens=True) )
1432
+
1433
+ # expected output:
1434
+ # Who told you that he didn't?
1435
+ # L'eau est une source importante de nourriture pour les gens.
1436
+ ```
1437
+
1438
+ You can also use OPUS-MT models with the transformers pipelines, for example:
1439
+
1440
+ ```python
1441
+ from transformers import pipeline
1442
+ pipe = pipeline("translation", model="Helsinki-NLP/opus-mt-tc-bible-big-afa-deu_eng_fra_por_spa")
1443
+ print(pipe(">>eng<< Anta i ak-d-yennan ur yerbiḥ ara Tom?"))
1444
+
1445
+ # expected output: Who told you that he didn't?
1446
+ ```
1447
+
1448
+ ## Training
1449
+
1450
+ - **Data**: opusTCv20230926max50+bt+jhubc ([source](https://github.com/Helsinki-NLP/Tatoeba-Challenge))
1451
+ - **Pre-processing**: SentencePiece (spm32k,spm32k)
1452
+ - **Model Type:** transformer-big
1453
+ - **Original MarianNMT Model**: [opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-29.zip](https://object.pouta.csc.fi/Tatoeba-MT-models/afa-deu+eng+fra+por+spa/opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-29.zip)
1454
+ - **Training Scripts**: [GitHub Repo](https://github.com/Helsinki-NLP/OPUS-MT-train)
1455
+
1456
+ ## Evaluation
1457
+
1458
+ * [Model scores at the OPUS-MT dashboard](https://opus.nlpl.eu/dashboard/index.php?pkg=opusmt&test=all&scoreslang=all&chart=standard&model=Tatoeba-MT-models/afa-deu%2Beng%2Bfra%2Bpor%2Bspa/opusTCv20230926max50%2Bbt%2Bjhubc_transformer-big_2024-05-29)
1459
+ * test set translations: [opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-29.test.txt](https://object.pouta.csc.fi/Tatoeba-MT-models/afa-deu+eng+fra+por+spa/opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-29.test.txt)
1460
+ * test set scores: [opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-29.eval.txt](https://object.pouta.csc.fi/Tatoeba-MT-models/afa-deu+eng+fra+por+spa/opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-29.eval.txt)
1461
+ * benchmark results: [benchmark_results.txt](benchmark_results.txt)
1462
+ * benchmark output: [benchmark_translations.zip](benchmark_translations.zip)
1463
+
1464
+ | langpair | testset | chr-F | BLEU | #sent | #words |
1465
+ |----------|---------|-------|-------|-------|--------|
1466
+ | ara-deu | tatoeba-test-v2021-08-07 | 0.61039 | 41.7 | 1209 | 8371 |
1467
+ | ara-eng | tatoeba-test-v2021-08-07 | 5.430 | 0.0 | 10305 | 76975 |
1468
+ | ara-fra | tatoeba-test-v2021-08-07 | 0.56120 | 38.8 | 1569 | 11066 |
1469
+ | ara-spa | tatoeba-test-v2021-08-07 | 0.62567 | 43.7 | 1511 | 9708 |
1470
+ | heb-deu | tatoeba-test-v2021-08-07 | 0.63131 | 42.4 | 3090 | 25101 |
1471
+ | heb-eng | tatoeba-test-v2021-08-07 | 0.64960 | 49.2 | 10519 | 77427 |
1472
+ | heb-fra | tatoeba-test-v2021-08-07 | 0.64348 | 46.3 | 3281 | 26123 |
1473
+ | heb-por | tatoeba-test-v2021-08-07 | 0.63350 | 43.2 | 719 | 5335 |
1474
+ | mlt-eng | tatoeba-test-v2021-08-07 | 0.66653 | 51.0 | 203 | 1165 |
1475
+ | amh-eng | flores101-devtest | 0.47357 | 21.0 | 1012 | 24721 |
1476
+ | amh-fra | flores101-devtest | 0.43155 | 16.2 | 1012 | 28343 |
1477
+ | amh-por | flores101-devtest | 0.42109 | 15.1 | 1012 | 26519 |
1478
+ | ara-deu | flores101-devtest | 0.51110 | 20.4 | 1012 | 25094 |
1479
+ | ara-fra | flores101-devtest | 0.56934 | 29.7 | 1012 | 28343 |
1480
+ | ara-por | flores101-devtest | 0.55727 | 28.2 | 1012 | 26519 |
1481
+ | ara-spa | flores101-devtest | 0.48350 | 19.5 | 1012 | 29199 |
1482
+ | hau-eng | flores101-devtest | 0.46804 | 21.6 | 1012 | 24721 |
1483
+ | hau-fra | flores101-devtest | 0.41827 | 15.9 | 1012 | 28343 |
1484
+ | heb-eng | flores101-devtest | 0.62422 | 36.6 | 1012 | 24721 |
1485
+ | mlt-eng | flores101-devtest | 0.72390 | 49.1 | 1012 | 24721 |
1486
+ | mlt-fra | flores101-devtest | 0.60840 | 34.7 | 1012 | 28343 |
1487
+ | mlt-por | flores101-devtest | 0.59863 | 31.8 | 1012 | 26519 |
1488
+ | acm-deu | flores200-devtest | 0.48947 | 17.6 | 1012 | 25094 |
1489
+ | acm-eng | flores200-devtest | 0.56799 | 28.5 | 1012 | 24721 |
1490
+ | acm-fra | flores200-devtest | 0.53577 | 26.1 | 1012 | 28343 |
1491
+ | acm-por | flores200-devtest | 0.52441 | 23.9 | 1012 | 26519 |
1492
+ | acm-spa | flores200-devtest | 0.46985 | 18.2 | 1012 | 29199 |
1493
+ | amh-deu | flores200-devtest | 0.41553 | 12.6 | 1012 | 25094 |
1494
+ | amh-eng | flores200-devtest | 0.49333 | 22.5 | 1012 | 24721 |
1495
+ | amh-fra | flores200-devtest | 0.44890 | 17.8 | 1012 | 28343 |
1496
+ | amh-por | flores200-devtest | 0.43771 | 16.5 | 1012 | 26519 |
1497
+ | apc-deu | flores200-devtest | 0.47480 | 16.0 | 1012 | 25094 |
1498
+ | apc-eng | flores200-devtest | 0.56075 | 28.1 | 1012 | 24721 |
1499
+ | apc-fra | flores200-devtest | 0.52325 | 24.6 | 1012 | 28343 |
1500
+ | apc-por | flores200-devtest | 0.51055 | 22.9 | 1012 | 26519 |
1501
+ | apc-spa | flores200-devtest | 0.45634 | 17.2 | 1012 | 29199 |
1502
+ | arz-deu | flores200-devtest | 0.45844 | 14.1 | 1012 | 25094 |
1503
+ | arz-eng | flores200-devtest | 0.52534 | 22.7 | 1012 | 24721 |
1504
+ | arz-fra | flores200-devtest | 0.50336 | 21.8 | 1012 | 28343 |
1505
+ | arz-por | flores200-devtest | 0.48741 | 20.0 | 1012 | 26519 |
1506
+ | arz-spa | flores200-devtest | 0.44516 | 15.8 | 1012 | 29199 |
1507
+ | hau-eng | flores200-devtest | 0.48137 | 23.4 | 1012 | 24721 |
1508
+ | hau-fra | flores200-devtest | 0.42981 | 17.2 | 1012 | 28343 |
1509
+ | hau-por | flores200-devtest | 0.41385 | 15.7 | 1012 | 26519 |
1510
+ | heb-deu | flores200-devtest | 0.53482 | 22.8 | 1012 | 25094 |
1511
+ | heb-eng | flores200-devtest | 0.63368 | 38.0 | 1012 | 24721 |
1512
+ | heb-fra | flores200-devtest | 0.58417 | 32.6 | 1012 | 28343 |
1513
+ | heb-por | flores200-devtest | 0.57140 | 30.7 | 1012 | 26519 |
1514
+ | mlt-eng | flores200-devtest | 0.73415 | 51.1 | 1012 | 24721 |
1515
+ | mlt-fra | flores200-devtest | 0.61626 | 35.8 | 1012 | 28343 |
1516
+ | mlt-spa | flores200-devtest | 0.50534 | 21.8 | 1012 | 29199 |
1517
+ | som-eng | flores200-devtest | 0.42764 | 17.7 | 1012 | 24721 |
1518
+ | tir-por | flores200-devtest | 2.931 | 0.0 | 1012 | 26519 |
1519
+ | hau-eng | newstest2021 | 0.43744 | 15.5 | 997 | 27372 |
1520
+ | amh-eng | ntrex128 | 0.42042 | 15.0 | 1997 | 47673 |
1521
+ | hau-eng | ntrex128 | 0.50349 | 26.1 | 1997 | 47673 |
1522
+ | hau-fra | ntrex128 | 0.41837 | 15.8 | 1997 | 53481 |
1523
+ | hau-por | ntrex128 | 0.40851 | 15.3 | 1997 | 51631 |
1524
+ | hau-spa | ntrex128 | 0.43376 | 18.5 | 1997 | 54107 |
1525
+ | heb-deu | ntrex128 | 0.49482 | 17.7 | 1997 | 48761 |
1526
+ | heb-eng | ntrex128 | 0.59241 | 31.3 | 1997 | 47673 |
1527
+ | heb-fra | ntrex128 | 0.52180 | 24.0 | 1997 | 53481 |
1528
+ | heb-por | ntrex128 | 0.51248 | 23.2 | 1997 | 51631 |
1529
+ | mlt-spa | ntrex128 | 0.57078 | 30.9 | 1997 | 54107 |
1530
+ | som-eng | ntrex128 | 0.49187 | 24.3 | 1997 | 47673 |
1531
+ | som-fra | ntrex128 | 0.41236 | 15.1 | 1997 | 53481 |
1532
+ | som-por | ntrex128 | 0.41550 | 15.2 | 1997 | 51631 |
1533
+ | som-spa | ntrex128 | 0.43278 | 17.6 | 1997 | 54107 |
1534
+ | tir-eng | tico19-test | 2.655 | 0.0 | 2100 | 56824 |
1535
+
1536
+ ## Citation Information
1537
+
1538
+ * Publications: [Democratizing neural machine translation with OPUS-MT](https://doi.org/10.1007/s10579-023-09704-w) and [OPUS-MT – Building open translation services for the World](https://aclanthology.org/2020.eamt-1.61/) and [The Tatoeba Translation Challenge – Realistic Data Sets for Low Resource and Multilingual MT](https://aclanthology.org/2020.wmt-1.139/) (Please, cite if you use this model.)
1539
+
1540
+ ```bibtex
1541
+ @article{tiedemann2023democratizing,
1542
+ title={Democratizing neural machine translation with {OPUS-MT}},
1543
+ author={Tiedemann, J{\"o}rg and Aulamo, Mikko and Bakshandaeva, Daria and Boggia, Michele and Gr{\"o}nroos, Stig-Arne and Nieminen, Tommi and Raganato, Alessandro and Scherrer, Yves and Vazquez, Raul and Virpioja, Sami},
1544
+ journal={Language Resources and Evaluation},
1545
+ number={58},
1546
+ pages={713--755},
1547
+ year={2023},
1548
+ publisher={Springer Nature},
1549
+ issn={1574-0218},
1550
+ doi={10.1007/s10579-023-09704-w}
1551
+ }
1552
+
1553
+ @inproceedings{tiedemann-thottingal-2020-opus,
1554
+ title = "{OPUS}-{MT} {--} Building open translation services for the World",
1555
+ author = {Tiedemann, J{\"o}rg and Thottingal, Santhosh},
1556
+ booktitle = "Proceedings of the 22nd Annual Conference of the European Association for Machine Translation",
1557
+ month = nov,
1558
+ year = "2020",
1559
+ address = "Lisboa, Portugal",
1560
+ publisher = "European Association for Machine Translation",
1561
+ url = "https://aclanthology.org/2020.eamt-1.61",
1562
+ pages = "479--480",
1563
+ }
1564
+
1565
+ @inproceedings{tiedemann-2020-tatoeba,
1566
+ title = "The Tatoeba Translation Challenge {--} Realistic Data Sets for Low Resource and Multilingual {MT}",
1567
+ author = {Tiedemann, J{\"o}rg},
1568
+ booktitle = "Proceedings of the Fifth Conference on Machine Translation",
1569
+ month = nov,
1570
+ year = "2020",
1571
+ address = "Online",
1572
+ publisher = "Association for Computational Linguistics",
1573
+ url = "https://aclanthology.org/2020.wmt-1.139",
1574
+ pages = "1174--1182",
1575
+ }
1576
+ ```
1577
+
1578
+ ## Acknowledgements
1579
+
1580
+ The work is supported by the [HPLT project](https://hplt-project.org/), funded by the European Union’s Horizon Europe research and innovation programme under grant agreement No 101070350. We are also grateful for the generous computational resources and IT infrastructure provided by [CSC -- IT Center for Science](https://www.csc.fi/), Finland, and the [EuroHPC supercomputer LUMI](https://www.lumi-supercomputer.eu/).
1581
+
1582
+ ## Model conversion info
1583
+
1584
+ * transformers version: 4.45.1
1585
+ * OPUS-MT git hash: a0ea3b3
1586
+ * port time: Mon Oct 7 16:01:48 EEST 2024
1587
+ * port machine: LM0-400-22516.local
benchmark_results.txt ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ multi-multi tatoeba-test-v2020-07-28-v2023-09-26 0.51369 34.0 10000 74513
2
+ amh-deu flores101-devtest 0.39890 11.5 1012 25094
3
+ amh-eng flores101-devtest 0.47357 21.0 1012 24721
4
+ amh-fra flores101-devtest 0.43155 16.2 1012 28343
5
+ amh-por flores101-devtest 0.42109 15.1 1012 26519
6
+ amh-spa flores101-devtest 0.38003 11.5 1012 29199
7
+ ara-deu flores101-devtest 0.51110 20.4 1012 25094
8
+ ara-fra flores101-devtest 0.56934 29.7 1012 28343
9
+ ara-por flores101-devtest 0.55727 28.2 1012 26519
10
+ ara-spa flores101-devtest 0.48350 19.5 1012 29199
11
+ hau-deu flores101-devtest 0.30486 6.3 1012 25094
12
+ hau-eng flores101-devtest 0.46804 21.6 1012 24721
13
+ hau-fra flores101-devtest 0.41827 15.9 1012 28343
14
+ hau-spa flores101-devtest 0.37201 11.5 1012 29199
15
+ heb-eng flores101-devtest 0.62422 36.6 1012 24721
16
+ mlt-eng flores101-devtest 0.72390 49.1 1012 24721
17
+ mlt-fra flores101-devtest 0.60840 34.7 1012 28343
18
+ mlt-por flores101-devtest 0.59863 31.8 1012 26519
19
+ orm-deu flores101-devtest 0.28280 4.1 1012 25094
20
+ orm-fra flores101-devtest 0.28443 6.1 1012 28343
21
+ orm-por flores101-devtest 0.28741 5.9 1012 26519
22
+ orm-spa flores101-devtest 0.27579 5.3 1012 29199
23
+ acm-deu flores200-devtest 0.48947 17.6 1012 25094
24
+ acm-eng flores200-devtest 0.56799 28.5 1012 24721
25
+ acm-fra flores200-devtest 0.53577 26.1 1012 28343
26
+ acm-por flores200-devtest 0.52441 23.9 1012 26519
27
+ acm-spa flores200-devtest 0.46985 18.2 1012 29199
28
+ amh-deu flores200-devtest 0.41553 12.6 1012 25094
29
+ amh-eng flores200-devtest 0.49333 22.5 1012 24721
30
+ amh-fra flores200-devtest 0.44890 17.8 1012 28343
31
+ amh-por flores200-devtest 0.43771 16.5 1012 26519
32
+ amh-spa flores200-devtest 0.39526 12.8 1012 29199
33
+ apc-deu flores200-devtest 0.47480 16.0 1012 25094
34
+ apc-eng flores200-devtest 0.56075 28.1 1012 24721
35
+ apc-fra flores200-devtest 0.52325 24.6 1012 28343
36
+ apc-por flores200-devtest 0.51055 22.9 1012 26519
37
+ apc-spa flores200-devtest 0.45634 17.2 1012 29199
38
+ ara-deu flores200-devtest 4.103 0.9 1 21
39
+ ara-eng flores200-devtest 4.219 0.9 1 21
40
+ ara-fra flores200-devtest 3.444 0.6 1 24
41
+ ara-por flores200-devtest 4.490 0.8 1 22
42
+ ara-spa flores200-devtest 0.16385 0.0 1 23
43
+ arz-deu flores200-devtest 0.45844 14.1 1012 25094
44
+ arz-eng flores200-devtest 0.52534 22.7 1012 24721
45
+ arz-fra flores200-devtest 0.50336 21.8 1012 28343
46
+ arz-por flores200-devtest 0.48741 20.0 1012 26519
47
+ arz-spa flores200-devtest 0.44516 15.8 1012 29199
48
+ hau-deu flores200-devtest 0.31338 6.9 1012 25094
49
+ hau-eng flores200-devtest 0.48137 23.4 1012 24721
50
+ hau-fra flores200-devtest 0.42981 17.2 1012 28343
51
+ hau-por flores200-devtest 0.41385 15.7 1012 26519
52
+ hau-spa flores200-devtest 0.37800 11.7 1012 29199
53
+ heb-deu flores200-devtest 0.53482 22.8 1012 25094
54
+ heb-eng flores200-devtest 0.63368 38.0 1012 24721
55
+ heb-fra flores200-devtest 0.58417 32.6 1012 28343
56
+ heb-por flores200-devtest 0.57140 30.7 1012 26519
57
+ kab-deu flores200-devtest 0.29507 5.1 1012 25094
58
+ kab-eng flores200-devtest 0.32121 10.0 1012 24721
59
+ kab-por flores200-devtest 0.32076 9.0 1012 26519
60
+ mlt-eng flores200-devtest 0.73415 51.1 1012 24721
61
+ mlt-fra flores200-devtest 0.61626 35.8 1012 28343
62
+ mlt-spa flores200-devtest 0.50534 21.8 1012 29199
63
+ som-deu flores200-devtest 0.37017 9.7 1012 25094
64
+ som-eng flores200-devtest 0.42764 17.7 1012 24721
65
+ som-fra flores200-devtest 0.39536 14.4 1012 28343
66
+ som-por flores200-devtest 0.39440 14.1 1012 26519
67
+ som-spa flores200-devtest 0.35321 10.0 1012 29199
68
+ tir-deu flores200-devtest 0.33956 6.5 1012 25094
69
+ tir-eng flores200-devtest 0.37119 11.6 1012 24721
70
+ tir-fra flores200-devtest 0.35347 9.7 1012 28343
71
+ tir-por flores200-devtest 2.931 0.0 1012 26519
72
+ tir-spa flores200-devtest 0.32859 7.6 1012 29199
73
+ hau-eng newstest2021 0.43744 15.5 997 27372
74
+ amh-deu ntrex128 0.35473 7.8 1997 48761
75
+ amh-eng ntrex128 0.42042 15.0 1997 47673
76
+ amh-fra ntrex128 0.37274 10.8 1997 53481
77
+ amh-por ntrex128 0.36420 9.8 1997 51631
78
+ amh-spa ntrex128 0.38306 12.1 1997 54107
79
+ hau-deu ntrex128 0.31360 6.6 1997 48761
80
+ hau-eng ntrex128 0.50349 26.1 1997 47673
81
+ hau-fra ntrex128 0.41837 15.8 1997 53481
82
+ hau-por ntrex128 0.40851 15.3 1997 51631
83
+ hau-spa ntrex128 0.43376 18.5 1997 54107
84
+ heb-deu ntrex128 0.49482 17.7 1997 48761
85
+ heb-eng ntrex128 0.59241 31.3 1997 47673
86
+ heb-fra ntrex128 0.52180 24.0 1997 53481
87
+ heb-por ntrex128 0.51248 23.2 1997 51631
88
+ mlt-spa ntrex128 0.57078 30.9 1997 54107
89
+ orm-deu ntrex128 0.24414 3.7 1997 48761
90
+ orm-eng ntrex128 0.23820 4.7 1997 47673
91
+ orm-por ntrex128 0.24063 4.0 1997 51631
92
+ shi-deu ntrex128 0.20534 1.0 1997 48761
93
+ shi-eng ntrex128 0.22850 1.8 1997 47673
94
+ shi-fra ntrex128 0.22309 2.4 1997 53481
95
+ shi-por ntrex128 0.21646 1.9 1997 51631
96
+ shi-spa ntrex128 0.23162 2.8 1997 54107
97
+ som-deu ntrex128 0.39394 11.0 1997 48761
98
+ som-eng ntrex128 0.49187 24.3 1997 47673
99
+ som-fra ntrex128 0.41236 15.1 1997 53481
100
+ som-por ntrex128 0.41550 15.2 1997 51631
101
+ som-spa ntrex128 0.43278 17.6 1997 54107
102
+ tir-deu ntrex128 0.32512 6.2 1997 48761
103
+ tir-eng ntrex128 0.37185 12.5 1997 47673
104
+ tir-fra ntrex128 0.32879 8.6 1997 53481
105
+ tir-por ntrex128 0.33090 8.5 1997 51631
106
+ tir-spa ntrex128 0.34985 10.2 1997 54107
107
+ ara-eng tatoeba-test-v2020-07-28 0.60134 42.8 10000 73977
108
+ arq-eng tatoeba-test-v2020-07-28 0.23786 6.6 403 3059
109
+ heb-eng tatoeba-test-v2020-07-28 0.63689 47.7 10000 73560
110
+ kab-spa tatoeba-test-v2020-07-28 0.41526 23.4 212 1333
111
+ amh-eng tatoeba-test-v2021-03-30 0.70202 59.5 202 1055
112
+ arq-eng tatoeba-test-v2021-03-30 0.23812 6.7 405 3075
113
+ kab-deu tatoeba-test-v2021-03-30 0.35013 15.0 376 2425
114
+ kab-eng tatoeba-test-v2021-03-30 0.44646 27.8 11692 73688
115
+ kab-fra tatoeba-test-v2021-03-30 0.35443 16.8 11158 89678
116
+ mlt-eng tatoeba-test-v2021-03-30 0.66333 50.6 206 1183
117
+ ara-deu tatoeba-test-v2021-08-07 0.61039 41.7 1209 8371
118
+ ara-eng tatoeba-test-v2021-08-07 5.430 0.0 10305 76975
119
+ ara-fra tatoeba-test-v2021-08-07 0.56120 38.8 1569 11066
120
+ ara-spa tatoeba-test-v2021-08-07 0.62567 43.7 1511 9708
121
+ arq-eng tatoeba-test-v2021-08-07 0.24532 7.0 405 3075
122
+ heb-deu tatoeba-test-v2021-08-07 0.63131 42.4 3090 25101
123
+ heb-eng tatoeba-test-v2021-08-07 0.64960 49.2 10519 77427
124
+ heb-fra tatoeba-test-v2021-08-07 0.64348 46.3 3281 26123
125
+ heb-por tatoeba-test-v2021-08-07 0.63350 43.2 719 5335
126
+ kab-spa tatoeba-test-v2021-08-07 0.36828 18.2 883 6705
127
+ mlt-eng tatoeba-test-v2021-08-07 0.66653 51.0 203 1165
128
+ ara-spa tico19-test 0.23860 1.0 2100 66563
129
+ tir-eng tico19-test 2.655 0.0 2100 56824
benchmark_translations.zip ADDED
File without changes
config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "pytorch-models/opus-mt-tc-bible-big-afa-deu_eng_fra_por_spa",
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "relu",
5
+ "architectures": [
6
+ "MarianMTModel"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "bos_token_id": 0,
10
+ "classifier_dropout": 0.0,
11
+ "d_model": 1024,
12
+ "decoder_attention_heads": 16,
13
+ "decoder_ffn_dim": 4096,
14
+ "decoder_layerdrop": 0.0,
15
+ "decoder_layers": 6,
16
+ "decoder_start_token_id": 61447,
17
+ "decoder_vocab_size": 61448,
18
+ "dropout": 0.1,
19
+ "encoder_attention_heads": 16,
20
+ "encoder_ffn_dim": 4096,
21
+ "encoder_layerdrop": 0.0,
22
+ "encoder_layers": 6,
23
+ "eos_token_id": 406,
24
+ "forced_eos_token_id": null,
25
+ "init_std": 0.02,
26
+ "is_encoder_decoder": true,
27
+ "max_length": null,
28
+ "max_position_embeddings": 1024,
29
+ "model_type": "marian",
30
+ "normalize_embedding": false,
31
+ "num_beams": null,
32
+ "num_hidden_layers": 6,
33
+ "pad_token_id": 61447,
34
+ "scale_embedding": true,
35
+ "share_encoder_decoder_embeddings": true,
36
+ "static_position_embeddings": true,
37
+ "torch_dtype": "float32",
38
+ "transformers_version": "4.45.1",
39
+ "use_cache": true,
40
+ "vocab_size": 61448
41
+ }
generation_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bad_words_ids": [
4
+ [
5
+ 61447
6
+ ]
7
+ ],
8
+ "bos_token_id": 0,
9
+ "decoder_start_token_id": 61447,
10
+ "eos_token_id": 406,
11
+ "forced_eos_token_id": 406,
12
+ "max_length": 512,
13
+ "num_beams": 4,
14
+ "pad_token_id": 61447,
15
+ "transformers_version": "4.45.1"
16
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eae21f9f138f8296ba401a07f5d6bbf93ed8717e549ebddbf8ec4af701b7277e
3
+ size 957395920
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:574d19800a181a2f7b5816203b20e5b9d2610ff1298761c9eec16d80852be20e
3
+ size 957447173
source.spm ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15ae3e79502defa35938861792ef2e0a159e2492926e83abb71aba173620c3ed
3
+ size 806005
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>"}
target.spm ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdcc6b68a87bf83097afc16df9e5f63b005f2dd7bb6262d4b98a2ffe365697b7
3
+ size 812456
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"source_lang": "afa", "target_lang": "deu+eng+fra+por+spa", "unk_token": "<unk>", "eos_token": "</s>", "pad_token": "<pad>", "model_max_length": 512, "sp_model_kwargs": {}, "separate_vocabs": false, "special_tokens_map_file": null, "name_or_path": "marian-models/opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-29/afa-deu+eng+fra+por+spa", "tokenizer_class": "MarianTokenizer"}
vocab.json ADDED
The diff for this file is too large to render. See raw diff