tiedeman's picture
Initial commit
68d38d6
|
raw
history blame
39.1 kB
---
library_name: transformers
language:
- abi
- acd
- ade
- adj
- ak
- akp
- ann
- anv
- atg
- avn
- bas
- bav
- bba
- beh
- bem
- bfd
- bfo
- bim
- biv
- bkv
- blh
- bmq
- bmv
- bom
- bov
- box
- bqj
- bss
- btt
- bud
- bwu
- cce
- cjk
- cko
- cme
- csk
- cwe
- cwt
- dag
- de
- dga
- dgi
- dig
- dop
- dug
- dyi
- dyo
- ee
- efi
- en
- es
- fal
- ff
- fon
- fr
- gej
- gkn
- gng
- gog
- gud
- gur
- guw
- gux
- gwr
- hag
- hay
- heh
- hz
- ife
- ig
- iri
- izr
- jbu
- jmc
- kam
- kbp
- kdc
- kdl
- kdn
- ken
- keu
- kez
- kg
- ki
- kia
- kj
- kki
- kkj
- kma
- kmb
- ksb
- ktj
- kub
- kus
- kyf
- las
- lee
- lef
- lem
- lg
- lia
- lip
- ln
- lob
- lon
- lua
- luy
- maw
- mcp
- mcu
- mda
- mfq
- mgo
- mnf
- mnh
- mor
- mos
- muh
- myk
- myx
- mzk
- mzm
- mzw
- ncu
- nd
- ndz
- nfr
- ng
- nhu
- nim
- nin
- nmz
- nnb
- nnh
- nnw
- nr
- nso
- ntm
- ntr
- nuj
- nwb
- ny
- nyf
- nyn
- nyo
- nyy
- nzi
- oku
- old
- ozm
- pai
- pbl
- pkb
- pt
- rim
- rn
- rw
- seh
- sg
- sig
- sil
- sld
- sn
- snw
- soy
- spp
- ss
- st
- suk
- sw
- sxb
- tbz
- tem
- thk
- tik
- tlj
- tn
- toh
- toi
- tpm
- ts
- tsw
- tum
- tw
- umb
- vag
- ve
- vmw
- vun
- wmw
- wo
- wob
- xh
- xog
- xon
- xrb
- xsm
- xuo
- yam
- yaz
- yo
- zu
tags:
- translation
- opus-mt-tc-bible
license: apache-2.0
model-index:
- name: opus-mt-tc-bible-big-alv-deu_eng_fra_por_spa
results:
- task:
name: Translation bem-eng
type: translation
args: bem-eng
dataset:
name: flores200-devtest
type: flores200-devtest
args: bem-eng
metrics:
- name: BLEU
type: bleu
value: 13.1
- name: chr-F
type: chrf
value: 0.37071
- task:
name: Translation ibo-eng
type: translation
args: ibo-eng
dataset:
name: flores200-devtest
type: flores200-devtest
args: ibo-eng
metrics:
- name: BLEU
type: bleu
value: 14.6
- name: chr-F
type: chrf
value: 0.38994
- task:
name: Translation kin-eng
type: translation
args: kin-eng
dataset:
name: flores200-devtest
type: flores200-devtest
args: kin-eng
metrics:
- name: BLEU
type: bleu
value: 18.1
- name: chr-F
type: chrf
value: 0.41964
- task:
name: Translation kin-fra
type: translation
args: kin-fra
dataset:
name: flores200-devtest
type: flores200-devtest
args: kin-fra
metrics:
- name: BLEU
type: bleu
value: 10.7
- name: chr-F
type: chrf
value: 0.34887
- task:
name: Translation kon-eng
type: translation
args: kon-eng
dataset:
name: flores200-devtest
type: flores200-devtest
args: kon-eng
metrics:
- name: BLEU
type: bleu
value: 11.3
- name: chr-F
type: chrf
value: 0.34262
- task:
name: Translation lin-eng
type: translation
args: lin-eng
dataset:
name: flores200-devtest
type: flores200-devtest
args: lin-eng
metrics:
- name: BLEU
type: bleu
value: 14.2
- name: chr-F
type: chrf
value: 0.37728
- task:
name: Translation lin-fra
type: translation
args: lin-fra
dataset:
name: flores200-devtest
type: flores200-devtest
args: lin-fra
metrics:
- name: BLEU
type: bleu
value: 11.0
- name: chr-F
type: chrf
value: 0.35052
- task:
name: Translation lug-eng
type: translation
args: lug-eng
dataset:
name: flores200-devtest
type: flores200-devtest
args: lug-eng
metrics:
- name: BLEU
type: bleu
value: 10.6
- name: chr-F
type: chrf
value: 0.31805
- task:
name: Translation nso-eng
type: translation
args: nso-eng
dataset:
name: flores200-devtest
type: flores200-devtest
args: nso-eng
metrics:
- name: BLEU
type: bleu
value: 22.3
- name: chr-F
type: chrf
value: 0.45662
- task:
name: Translation nso-fra
type: translation
args: nso-fra
dataset:
name: flores200-devtest
type: flores200-devtest
args: nso-fra
metrics:
- name: BLEU
type: bleu
value: 10.5
- name: chr-F
type: chrf
value: 0.33732
- task:
name: Translation nya-eng
type: translation
args: nya-eng
dataset:
name: flores200-devtest
type: flores200-devtest
args: nya-eng
metrics:
- name: BLEU
type: bleu
value: 15.8
- name: chr-F
type: chrf
value: 0.39887
- task:
name: Translation run-eng
type: translation
args: run-eng
dataset:
name: flores200-devtest
type: flores200-devtest
args: run-eng
metrics:
- name: BLEU
type: bleu
value: 15.5
- name: chr-F
type: chrf
value: 0.39846
- task:
name: Translation run-fra
type: translation
args: run-fra
dataset:
name: flores200-devtest
type: flores200-devtest
args: run-fra
metrics:
- name: BLEU
type: bleu
value: 10.8
- name: chr-F
type: chrf
value: 0.34845
- task:
name: Translation run-por
type: translation
args: run-por
dataset:
name: flores200-devtest
type: flores200-devtest
args: run-por
metrics:
- name: BLEU
type: bleu
value: 10.2
- name: chr-F
type: chrf
value: 0.33836
- task:
name: Translation sna-eng
type: translation
args: sna-eng
dataset:
name: flores200-devtest
type: flores200-devtest
args: sna-eng
metrics:
- name: BLEU
type: bleu
value: 17.2
- name: chr-F
type: chrf
value: 0.41974
- task:
name: Translation sna-fra
type: translation
args: sna-fra
dataset:
name: flores200-devtest
type: flores200-devtest
args: sna-fra
metrics:
- name: BLEU
type: bleu
value: 11.3
- name: chr-F
type: chrf
value: 0.36443
- task:
name: Translation sna-por
type: translation
args: sna-por
dataset:
name: flores200-devtest
type: flores200-devtest
args: sna-por
metrics:
- name: BLEU
type: bleu
value: 10.2
- name: chr-F
type: chrf
value: 0.34260
- task:
name: Translation sot-eng
type: translation
args: sot-eng
dataset:
name: flores200-devtest
type: flores200-devtest
args: sot-eng
metrics:
- name: BLEU
type: bleu
value: 20.7
- name: chr-F
type: chrf
value: 0.45415
- task:
name: Translation sot-fra
type: translation
args: sot-fra
dataset:
name: flores200-devtest
type: flores200-devtest
args: sot-fra
metrics:
- name: BLEU
type: bleu
value: 10.7
- name: chr-F
type: chrf
value: 0.34608
- task:
name: Translation ssw-eng
type: translation
args: ssw-eng
dataset:
name: flores200-devtest
type: flores200-devtest
args: ssw-eng
metrics:
- name: BLEU
type: bleu
value: 16.1
- name: chr-F
type: chrf
value: 0.39768
- task:
name: Translation swh-deu
type: translation
args: swh-deu
dataset:
name: flores200-devtest
type: flores200-devtest
args: swh-deu
metrics:
- name: BLEU
type: bleu
value: 10.9
- name: chr-F
type: chrf
value: 0.38892
- task:
name: Translation swh-eng
type: translation
args: swh-eng
dataset:
name: flores200-devtest
type: flores200-devtest
args: swh-eng
metrics:
- name: BLEU
type: bleu
value: 29.1
- name: chr-F
type: chrf
value: 0.54048
- task:
name: Translation swh-fra
type: translation
args: swh-fra
dataset:
name: flores200-devtest
type: flores200-devtest
args: swh-fra
metrics:
- name: BLEU
type: bleu
value: 18.2
- name: chr-F
type: chrf
value: 0.44837
- task:
name: Translation swh-por
type: translation
args: swh-por
dataset:
name: flores200-devtest
type: flores200-devtest
args: swh-por
metrics:
- name: BLEU
type: bleu
value: 17.6
- name: chr-F
type: chrf
value: 0.44062
- task:
name: Translation swh-spa
type: translation
args: swh-spa
dataset:
name: flores200-devtest
type: flores200-devtest
args: swh-spa
metrics:
- name: BLEU
type: bleu
value: 11.6
- name: chr-F
type: chrf
value: 0.38855
- task:
name: Translation tsn-eng
type: translation
args: tsn-eng
dataset:
name: flores200-devtest
type: flores200-devtest
args: tsn-eng
metrics:
- name: BLEU
type: bleu
value: 15.3
- name: chr-F
type: chrf
value: 0.40410
- task:
name: Translation tsn-fra
type: translation
args: tsn-fra
dataset:
name: flores200-devtest
type: flores200-devtest
args: tsn-fra
metrics:
- name: BLEU
type: bleu
value: 10.3
- name: chr-F
type: chrf
value: 0.34284
- task:
name: Translation tso-eng
type: translation
args: tso-eng
dataset:
name: flores200-devtest
type: flores200-devtest
args: tso-eng
metrics:
- name: BLEU
type: bleu
value: 17.6
- name: chr-F
type: chrf
value: 0.41504
- task:
name: Translation tso-fra
type: translation
args: tso-fra
dataset:
name: flores200-devtest
type: flores200-devtest
args: tso-fra
metrics:
- name: BLEU
type: bleu
value: 10.1
- name: chr-F
type: chrf
value: 0.33502
- task:
name: Translation xho-eng
type: translation
args: xho-eng
dataset:
name: flores200-devtest
type: flores200-devtest
args: xho-eng
metrics:
- name: BLEU
type: bleu
value: 23.7
- name: chr-F
type: chrf
value: 0.47667
- task:
name: Translation xho-fra
type: translation
args: xho-fra
dataset:
name: flores200-devtest
type: flores200-devtest
args: xho-fra
metrics:
- name: BLEU
type: bleu
value: 14.1
- name: chr-F
type: chrf
value: 0.39392
- task:
name: Translation xho-por
type: translation
args: xho-por
dataset:
name: flores200-devtest
type: flores200-devtest
args: xho-por
metrics:
- name: BLEU
type: bleu
value: 12.3
- name: chr-F
type: chrf
value: 0.37032
- task:
name: Translation zul-eng
type: translation
args: zul-eng
dataset:
name: flores200-devtest
type: flores200-devtest
args: zul-eng
metrics:
- name: BLEU
type: bleu
value: 23.4
- name: chr-F
type: chrf
value: 0.47798
- task:
name: Translation zul-fra
type: translation
args: zul-fra
dataset:
name: flores200-devtest
type: flores200-devtest
args: zul-fra
metrics:
- name: BLEU
type: bleu
value: 13.9
- name: chr-F
type: chrf
value: 0.39504
- task:
name: Translation zul-por
type: translation
args: zul-por
dataset:
name: flores200-devtest
type: flores200-devtest
args: zul-por
metrics:
- name: BLEU
type: bleu
value: 12.3
- name: chr-F
type: chrf
value: 0.36947
- task:
name: Translation ibo-eng
type: translation
args: ibo-eng
dataset:
name: flores101-devtest
type: flores_101
args: ibo eng devtest
metrics:
- name: BLEU
type: bleu
value: 12.5
- name: chr-F
type: chrf
value: 0.36320
- task:
name: Translation nya-eng
type: translation
args: nya-eng
dataset:
name: flores101-devtest
type: flores_101
args: nya eng devtest
metrics:
- name: BLEU
type: bleu
value: 13.2
- name: chr-F
type: chrf
value: 0.36765
- task:
name: Translation swh-por
type: translation
args: swh-por
dataset:
name: flores101-devtest
type: flores_101
args: swh por devtest
metrics:
- name: BLEU
type: bleu
value: 14.7
- name: chr-F
type: chrf
value: 0.40847
- task:
name: Translation xho-por
type: translation
args: xho-por
dataset:
name: flores101-devtest
type: flores_101
args: xho por devtest
metrics:
- name: BLEU
type: bleu
value: 10.1
- name: chr-F
type: chrf
value: 0.33906
- task:
name: Translation zul-fra
type: translation
args: zul-fra
dataset:
name: flores101-devtest
type: flores_101
args: zul fra devtest
metrics:
- name: BLEU
type: bleu
value: 11.2
- name: chr-F
type: chrf
value: 0.35968
- task:
name: Translation bem-eng
type: translation
args: bem-eng
dataset:
name: ntrex128
type: ntrex128
args: bem-eng
metrics:
- name: BLEU
type: bleu
value: 14.9
- name: chr-F
type: chrf
value: 0.38068
- task:
name: Translation bem-spa
type: translation
args: bem-spa
dataset:
name: ntrex128
type: ntrex128
args: bem-spa
metrics:
- name: BLEU
type: bleu
value: 10.2
- name: chr-F
type: chrf
value: 0.33394
- task:
name: Translation ibo-eng
type: translation
args: ibo-eng
dataset:
name: ntrex128
type: ntrex128
args: ibo-eng
metrics:
- name: BLEU
type: bleu
value: 17.4
- name: chr-F
type: chrf
value: 0.42002
- task:
name: Translation ibo-fra
type: translation
args: ibo-fra
dataset:
name: ntrex128
type: ntrex128
args: ibo-fra
metrics:
- name: BLEU
type: bleu
value: 10.3
- name: chr-F
type: chrf
value: 0.34993
- task:
name: Translation ibo-spa
type: translation
args: ibo-spa
dataset:
name: ntrex128
type: ntrex128
args: ibo-spa
metrics:
- name: BLEU
type: bleu
value: 11.7
- name: chr-F
type: chrf
value: 0.36108
- task:
name: Translation kin-eng
type: translation
args: kin-eng
dataset:
name: ntrex128
type: ntrex128
args: kin-eng
metrics:
- name: BLEU
type: bleu
value: 16.9
- name: chr-F
type: chrf
value: 0.42892
- task:
name: Translation kin-fra
type: translation
args: kin-fra
dataset:
name: ntrex128
type: ntrex128
args: kin-fra
metrics:
- name: BLEU
type: bleu
value: 10.7
- name: chr-F
type: chrf
value: 0.35842
- task:
name: Translation kin-por
type: translation
args: kin-por
dataset:
name: ntrex128
type: ntrex128
args: kin-por
metrics:
- name: BLEU
type: bleu
value: 10.0
- name: chr-F
type: chrf
value: 0.34399
- task:
name: Translation kin-spa
type: translation
args: kin-spa
dataset:
name: ntrex128
type: ntrex128
args: kin-spa
metrics:
- name: BLEU
type: bleu
value: 12.5
- name: chr-F
type: chrf
value: 0.37224
- task:
name: Translation nde-eng
type: translation
args: nde-eng
dataset:
name: ntrex128
type: ntrex128
args: nde-eng
metrics:
- name: BLEU
type: bleu
value: 13.8
- name: chr-F
type: chrf
value: 0.39640
- task:
name: Translation nde-spa
type: translation
args: nde-spa
dataset:
name: ntrex128
type: ntrex128
args: nde-spa
metrics:
- name: BLEU
type: bleu
value: 10.0
- name: chr-F
type: chrf
value: 0.34638
- task:
name: Translation nso-eng
type: translation
args: nso-eng
dataset:
name: ntrex128
type: ntrex128
args: nso-eng
metrics:
- name: BLEU
type: bleu
value: 17.0
- name: chr-F
type: chrf
value: 0.42278
- task:
name: Translation nso-spa
type: translation
args: nso-spa
dataset:
name: ntrex128
type: ntrex128
args: nso-spa
metrics:
- name: BLEU
type: bleu
value: 11.3
- name: chr-F
type: chrf
value: 0.35027
- task:
name: Translation nya-eng
type: translation
args: nya-eng
dataset:
name: ntrex128
type: ntrex128
args: nya-eng
metrics:
- name: BLEU
type: bleu
value: 19.2
- name: chr-F
type: chrf
value: 0.42702
- task:
name: Translation nya-fra
type: translation
args: nya-fra
dataset:
name: ntrex128
type: ntrex128
args: nya-fra
metrics:
- name: BLEU
type: bleu
value: 10.7
- name: chr-F
type: chrf
value: 0.35503
- task:
name: Translation nya-por
type: translation
args: nya-por
dataset:
name: ntrex128
type: ntrex128
args: nya-por
metrics:
- name: BLEU
type: bleu
value: 10.4
- name: chr-F
type: chrf
value: 0.33912
- task:
name: Translation nya-spa
type: translation
args: nya-spa
dataset:
name: ntrex128
type: ntrex128
args: nya-spa
metrics:
- name: BLEU
type: bleu
value: 12.5
- name: chr-F
type: chrf
value: 0.36355
- task:
name: Translation ssw-eng
type: translation
args: ssw-eng
dataset:
name: ntrex128
type: ntrex128
args: ssw-eng
metrics:
- name: BLEU
type: bleu
value: 18.0
- name: chr-F
type: chrf
value: 0.43041
- task:
name: Translation ssw-spa
type: translation
args: ssw-spa
dataset:
name: ntrex128
type: ntrex128
args: ssw-spa
metrics:
- name: BLEU
type: bleu
value: 11.4
- name: chr-F
type: chrf
value: 0.35392
- task:
name: Translation swa-deu
type: translation
args: swa-deu
dataset:
name: ntrex128
type: ntrex128
args: swa-deu
metrics:
- name: BLEU
type: bleu
value: 11.2
- name: chr-F
type: chrf
value: 0.39475
- task:
name: Translation swa-eng
type: translation
args: swa-eng
dataset:
name: ntrex128
type: ntrex128
args: swa-eng
metrics:
- name: BLEU
type: bleu
value: 30.4
- name: chr-F
type: chrf
value: 0.54492
- task:
name: Translation swa-fra
type: translation
args: swa-fra
dataset:
name: ntrex128
type: ntrex128
args: swa-fra
metrics:
- name: BLEU
type: bleu
value: 15.6
- name: chr-F
type: chrf
value: 0.43008
- task:
name: Translation swa-por
type: translation
args: swa-por
dataset:
name: ntrex128
type: ntrex128
args: swa-por
metrics:
- name: BLEU
type: bleu
value: 15.4
- name: chr-F
type: chrf
value: 0.42343
- task:
name: Translation swa-spa
type: translation
args: swa-spa
dataset:
name: ntrex128
type: ntrex128
args: swa-spa
metrics:
- name: BLEU
type: bleu
value: 18.9
- name: chr-F
type: chrf
value: 0.44892
- task:
name: Translation tsn-eng
type: translation
args: tsn-eng
dataset:
name: ntrex128
type: ntrex128
args: tsn-eng
metrics:
- name: BLEU
type: bleu
value: 20.1
- name: chr-F
type: chrf
value: 0.44944
- task:
name: Translation tsn-fra
type: translation
args: tsn-fra
dataset:
name: ntrex128
type: ntrex128
args: tsn-fra
metrics:
- name: BLEU
type: bleu
value: 11.5
- name: chr-F
type: chrf
value: 0.36584
- task:
name: Translation tsn-por
type: translation
args: tsn-por
dataset:
name: ntrex128
type: ntrex128
args: tsn-por
metrics:
- name: BLEU
type: bleu
value: 11.1
- name: chr-F
type: chrf
value: 0.35774
- task:
name: Translation tsn-spa
type: translation
args: tsn-spa
dataset:
name: ntrex128
type: ntrex128
args: tsn-spa
metrics:
- name: BLEU
type: bleu
value: 13.8
- name: chr-F
type: chrf
value: 0.38149
- task:
name: Translation ven-eng
type: translation
args: ven-eng
dataset:
name: ntrex128
type: ntrex128
args: ven-eng
metrics:
- name: BLEU
type: bleu
value: 12.7
- name: chr-F
type: chrf
value: 0.36848
- task:
name: Translation xho-eng
type: translation
args: xho-eng
dataset:
name: ntrex128
type: ntrex128
args: xho-eng
metrics:
- name: BLEU
type: bleu
value: 21.8
- name: chr-F
type: chrf
value: 0.46636
- task:
name: Translation xho-fra
type: translation
args: xho-fra
dataset:
name: ntrex128
type: ntrex128
args: xho-fra
metrics:
- name: BLEU
type: bleu
value: 11.6
- name: chr-F
type: chrf
value: 0.36905
- task:
name: Translation xho-por
type: translation
args: xho-por
dataset:
name: ntrex128
type: ntrex128
args: xho-por
metrics:
- name: BLEU
type: bleu
value: 10.6
- name: chr-F
type: chrf
value: 0.35687
- task:
name: Translation xho-spa
type: translation
args: xho-spa
dataset:
name: ntrex128
type: ntrex128
args: xho-spa
metrics:
- name: BLEU
type: bleu
value: 13.3
- name: chr-F
type: chrf
value: 0.38176
- task:
name: Translation yor-eng
type: translation
args: yor-eng
dataset:
name: ntrex128
type: ntrex128
args: yor-eng
metrics:
- name: BLEU
type: bleu
value: 11.2
- name: chr-F
type: chrf
value: 0.34615
- task:
name: Translation zul-eng
type: translation
args: zul-eng
dataset:
name: ntrex128
type: ntrex128
args: zul-eng
metrics:
- name: BLEU
type: bleu
value: 21.9
- name: chr-F
type: chrf
value: 0.45848
- task:
name: Translation zul-fra
type: translation
args: zul-fra
dataset:
name: ntrex128
type: ntrex128
args: zul-fra
metrics:
- name: BLEU
type: bleu
value: 11.4
- name: chr-F
type: chrf
value: 0.36203
- task:
name: Translation zul-por
type: translation
args: zul-por
dataset:
name: ntrex128
type: ntrex128
args: zul-por
metrics:
- name: BLEU
type: bleu
value: 10.8
- name: chr-F
type: chrf
value: 0.35081
- task:
name: Translation zul-spa
type: translation
args: zul-spa
dataset:
name: ntrex128
type: ntrex128
args: zul-spa
metrics:
- name: BLEU
type: bleu
value: 13.0
- name: chr-F
type: chrf
value: 0.37270
- task:
name: Translation multi-multi
type: translation
args: multi-multi
dataset:
name: tatoeba-test-v2020-07-28-v2023-09-26
type: tatoeba_mt
args: multi-multi
metrics:
- name: BLEU
type: bleu
value: 27.3
- name: chr-F
type: chrf
value: 0.44751
- task:
name: Translation run-eng
type: translation
args: run-eng
dataset:
name: tatoeba-test-v2021-08-07
type: tatoeba_mt
args: run-eng
metrics:
- name: BLEU
type: bleu
value: 34.9
- name: chr-F
type: chrf
value: 0.49949
- task:
name: Translation run-fra
type: translation
args: run-fra
dataset:
name: tatoeba-test-v2021-08-07
type: tatoeba_mt
args: run-fra
metrics:
- name: BLEU
type: bleu
value: 22.4
- name: chr-F
type: chrf
value: 0.41431
- task:
name: Translation swa-eng
type: translation
args: swa-eng
dataset:
name: tatoeba-test-v2021-08-07
type: tatoeba_mt
args: swa-eng
metrics:
- name: BLEU
type: bleu
value: 41.5
- name: chr-F
type: chrf
value: 0.57031
- task:
name: Translation xho-eng
type: translation
args: xho-eng
dataset:
name: tatoeba-test-v2021-03-30
type: tatoeba_mt
args: xho-eng
metrics:
- name: BLEU
type: bleu
value: 36.4
- name: chr-F
type: chrf
value: 0.51250
- task:
name: Translation zul-eng
type: translation
args: zul-eng
dataset:
name: tico19-test
type: tico19-test
args: zul-eng
metrics:
- name: BLEU
type: bleu
value: 25.2
- name: chr-F
type: chrf
value: 0.48762
- task:
name: Translation zul-fra
type: translation
args: zul-fra
dataset:
name: tico19-test
type: tico19-test
args: zul-fra
metrics:
- name: BLEU
type: bleu
value: 13.7
- name: chr-F
type: chrf
value: 0.37823
- task:
name: Translation zul-por
type: translation
args: zul-por
dataset:
name: tico19-test
type: tico19-test
args: zul-por
metrics:
- name: BLEU
type: bleu
value: 14.0
- name: chr-F
type: chrf
value: 0.38586
- task:
name: Translation zul-spa
type: translation
args: zul-spa
dataset:
name: tico19-test
type: tico19-test
args: zul-spa
metrics:
- name: BLEU
type: bleu
value: 15.9
- name: chr-F
type: chrf
value: 0.40041
---
# opus-mt-tc-bible-big-alv-deu_eng_fra_por_spa
## Table of Contents
- [Model Details](#model-details)
- [Uses](#uses)
- [Risks, Limitations and Biases](#risks-limitations-and-biases)
- [How to Get Started With the Model](#how-to-get-started-with-the-model)
- [Training](#training)
- [Evaluation](#evaluation)
- [Citation Information](#citation-information)
- [Acknowledgements](#acknowledgements)
## Model Details
Neural machine translation model for translating from Atlantic-Congo languages (alv) to unknown (deu+eng+fra+por+spa).
This model is part of the [OPUS-MT project](https://github.com/Helsinki-NLP/Opus-MT), an effort to make neural machine translation models widely available and accessible for many languages in the world. All models are originally trained using the amazing framework of [Marian NMT](https://marian-nmt.github.io/), an efficient NMT implementation written in pure C++. The models have been converted to pyTorch using the transformers library by huggingface. Training data is taken from [OPUS](https://opus.nlpl.eu/) and training pipelines use the procedures of [OPUS-MT-train](https://github.com/Helsinki-NLP/Opus-MT-train).
**Model Description:**
- **Developed by:** Language Technology Research Group at the University of Helsinki
- **Model Type:** Translation (transformer-big)
- **Release**: 2024-05-30
- **License:** Apache-2.0
- **Language(s):**
- Source Language(s): abi acd ade adj aka akp ann anv atg avn bas bav bba beh bem bfd bfo bim biv bkv blh bmq bmv bom bov box bqj bss btt bud bwu cce cjk cko cme csk cwe cwt dag dga dgi dig dop dug dyi dyo efi ewe fal fon fuc ful gej gkn gng gog gud gur guw gux gwr hag hay heh her ibo ife iri izr jbu jmc kam kbp kdc kdl kdn ken keu kez kia kik kin kki kkj kma kmb kon ksb ktj kua kub kus kyf las lee lef lem lia lin lip lob lon lua lug luy maw mcp mcu mda mfq mgo mnf mnh mor mos muh myk myx mzk mzm mzw nbl ncu nde ndo ndz nfr nhu nim nin nmz nnb nnh nnw nso ntm ntr nuj nwb nya nyf nyn nyo nyy nzi oku old ozm pai pbl pkb rim run sag seh sig sil sld sna snw sot soy spp ssw suk swa swc swh sxb tbz tem thk tik tlj toh toi tpm tsn tso tsw tum twi umb vag ven vmw vun wmw wob wol xho xog xon xrb xsm xuo yam yaz yor zul
- Target Language(s): deu eng fra por spa
- Valid Target Language Labels: >>deu<< >>eng<< >>fra<< >>por<< >>spa<< >>xxx<<
- **Original Model**: [opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-30.zip](https://object.pouta.csc.fi/Tatoeba-MT-models/alv-deu+eng+fra+por+spa/opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-30.zip)
- **Resources for more information:**
- [OPUS-MT dashboard](https://opus.nlpl.eu/dashboard/index.php?pkg=opusmt&test=all&scoreslang=all&chart=standard&model=Tatoeba-MT-models/alv-deu%2Beng%2Bfra%2Bpor%2Bspa/opusTCv20230926max50%2Bbt%2Bjhubc_transformer-big_2024-05-30)
- [OPUS-MT-train GitHub Repo](https://github.com/Helsinki-NLP/OPUS-MT-train)
- [More information about MarianNMT models in the transformers library](https://huggingface.co./docs/transformers/model_doc/marian)
- [Tatoeba Translation Challenge](https://github.com/Helsinki-NLP/Tatoeba-Challenge/)
- [HPLT bilingual data v1 (as part of the Tatoeba Translation Challenge dataset)](https://hplt-project.org/datasets/v1)
- [A massively parallel Bible corpus](https://aclanthology.org/L14-1215/)
This is a multilingual translation model with multiple target languages. A sentence initial language token is required in the form of `>>id<<` (id = valid target language ID), e.g. `>>deu<<`
## Uses
This model can be used for translation and text-to-text generation.
## Risks, Limitations and Biases
**CONTENT WARNING: Readers should be aware that the model is trained on various public data sets that may contain content that is disturbing, offensive, and can propagate historical and current stereotypes.**
Significant research has explored bias and fairness issues with language models (see, e.g., [Sheng et al. (2021)](https://aclanthology.org/2021.acl-long.330.pdf) and [Bender et al. (2021)](https://dl.acm.org/doi/pdf/10.1145/3442188.3445922)).
## How to Get Started With the Model
A short example code:
```python
from transformers import MarianMTModel, MarianTokenizer
src_text = [
">>deu<< Replace this with text in an accepted source language.",
">>spa<< This is the second sentence."
]
model_name = "pytorch-models/opus-mt-tc-bible-big-alv-deu_eng_fra_por_spa"
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name)
translated = model.generate(**tokenizer(src_text, return_tensors="pt", padding=True))
for t in translated:
print( tokenizer.decode(t, skip_special_tokens=True) )
```
You can also use OPUS-MT models with the transformers pipelines, for example:
```python
from transformers import pipeline
pipe = pipeline("translation", model="Helsinki-NLP/opus-mt-tc-bible-big-alv-deu_eng_fra_por_spa")
print(pipe(">>deu<< Replace this with text in an accepted source language."))
```
## Training
- **Data**: opusTCv20230926max50+bt+jhubc ([source](https://github.com/Helsinki-NLP/Tatoeba-Challenge))
- **Pre-processing**: SentencePiece (spm32k,spm32k)
- **Model Type:** transformer-big
- **Original MarianNMT Model**: [opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-30.zip](https://object.pouta.csc.fi/Tatoeba-MT-models/alv-deu+eng+fra+por+spa/opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-30.zip)
- **Training Scripts**: [GitHub Repo](https://github.com/Helsinki-NLP/OPUS-MT-train)
## Evaluation
* [Model scores at the OPUS-MT dashboard](https://opus.nlpl.eu/dashboard/index.php?pkg=opusmt&test=all&scoreslang=all&chart=standard&model=Tatoeba-MT-models/alv-deu%2Beng%2Bfra%2Bpor%2Bspa/opusTCv20230926max50%2Bbt%2Bjhubc_transformer-big_2024-05-30)
* test set translations: [opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-29.test.txt](https://object.pouta.csc.fi/Tatoeba-MT-models/alv-deu+eng+fra+por+spa/opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-29.test.txt)
* test set scores: [opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-29.eval.txt](https://object.pouta.csc.fi/Tatoeba-MT-models/alv-deu+eng+fra+por+spa/opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-29.eval.txt)
* benchmark results: [benchmark_results.txt](benchmark_results.txt)
* benchmark output: [benchmark_translations.zip](benchmark_translations.zip)
| langpair | testset | chr-F | BLEU | #sent | #words |
|----------|---------|-------|-------|-------|--------|
| run-eng | tatoeba-test-v2021-08-07 | 0.49949 | 34.9 | 1703 | 10041 |
| run-fra | tatoeba-test-v2021-08-07 | 0.41431 | 22.4 | 1274 | 7479 |
| swa-eng | tatoeba-test-v2021-08-07 | 0.57031 | 41.5 | 387 | 2508 |
| swh-por | flores101-devtest | 0.40847 | 14.7 | 1012 | 26519 |
| kin-eng | flores200-devtest | 0.41964 | 18.1 | 1012 | 24721 |
| nso-eng | flores200-devtest | 0.45662 | 22.3 | 1012 | 24721 |
| sna-eng | flores200-devtest | 0.41974 | 17.2 | 1012 | 24721 |
| sot-eng | flores200-devtest | 0.45415 | 20.7 | 1012 | 24721 |
| swh-eng | flores200-devtest | 0.54048 | 29.1 | 1012 | 24721 |
| swh-fra | flores200-devtest | 0.44837 | 18.2 | 1012 | 28343 |
| swh-por | flores200-devtest | 0.44062 | 17.6 | 1012 | 26519 |
| tsn-eng | flores200-devtest | 0.40410 | 15.3 | 1012 | 24721 |
| tso-eng | flores200-devtest | 0.41504 | 17.6 | 1012 | 24721 |
| xho-eng | flores200-devtest | 0.47667 | 23.7 | 1012 | 24721 |
| zul-eng | flores200-devtest | 0.47798 | 23.4 | 1012 | 24721 |
| ibo-eng | ntrex128 | 0.42002 | 17.4 | 1997 | 47673 |
| kin-eng | ntrex128 | 0.42892 | 16.9 | 1997 | 47673 |
| nso-eng | ntrex128 | 0.42278 | 17.0 | 1997 | 47673 |
| nya-eng | ntrex128 | 0.42702 | 19.2 | 1997 | 47673 |
| ssw-eng | ntrex128 | 0.43041 | 18.0 | 1997 | 47673 |
| swa-eng | ntrex128 | 0.54492 | 30.4 | 1997 | 47673 |
| swa-fra | ntrex128 | 0.43008 | 15.6 | 1997 | 53481 |
| swa-por | ntrex128 | 0.42343 | 15.4 | 1997 | 51631 |
| swa-spa | ntrex128 | 0.44892 | 18.9 | 1997 | 54107 |
| tsn-eng | ntrex128 | 0.44944 | 20.1 | 1997 | 47673 |
| xho-eng | ntrex128 | 0.46636 | 21.8 | 1997 | 47673 |
| zul-eng | ntrex128 | 0.45848 | 21.9 | 1997 | 47673 |
| zul-eng | tico19-test | 0.48762 | 25.2 | 2100 | 56804 |
| zul-spa | tico19-test | 0.40041 | 15.9 | 2100 | 66563 |
## Citation Information
* Publications: [Democratizing neural machine translation with OPUS-MT](https://doi.org/10.1007/s10579-023-09704-w) and [OPUS-MT – Building open translation services for the World](https://aclanthology.org/2020.eamt-1.61/) and [The Tatoeba Translation Challenge – Realistic Data Sets for Low Resource and Multilingual MT](https://aclanthology.org/2020.wmt-1.139/) (Please, cite if you use this model.)
```bibtex
@article{tiedemann2023democratizing,
title={Democratizing neural machine translation with {OPUS-MT}},
author={Tiedemann, J{\"o}rg and Aulamo, Mikko and Bakshandaeva, Daria and Boggia, Michele and Gr{\"o}nroos, Stig-Arne and Nieminen, Tommi and Raganato, Alessandro and Scherrer, Yves and Vazquez, Raul and Virpioja, Sami},
journal={Language Resources and Evaluation},
number={58},
pages={713--755},
year={2023},
publisher={Springer Nature},
issn={1574-0218},
doi={10.1007/s10579-023-09704-w}
}
@inproceedings{tiedemann-thottingal-2020-opus,
title = "{OPUS}-{MT} {--} Building open translation services for the World",
author = {Tiedemann, J{\"o}rg and Thottingal, Santhosh},
booktitle = "Proceedings of the 22nd Annual Conference of the European Association for Machine Translation",
month = nov,
year = "2020",
address = "Lisboa, Portugal",
publisher = "European Association for Machine Translation",
url = "https://aclanthology.org/2020.eamt-1.61",
pages = "479--480",
}
@inproceedings{tiedemann-2020-tatoeba,
title = "The Tatoeba Translation Challenge {--} Realistic Data Sets for Low Resource and Multilingual {MT}",
author = {Tiedemann, J{\"o}rg},
booktitle = "Proceedings of the Fifth Conference on Machine Translation",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.wmt-1.139",
pages = "1174--1182",
}
```
## Acknowledgements
The work is supported by the [HPLT project](https://hplt-project.org/), funded by the European Union’s Horizon Europe research and innovation programme under grant agreement No 101070350. We are also grateful for the generous computational resources and IT infrastructure provided by [CSC -- IT Center for Science](https://www.csc.fi/), Finland, and the [EuroHPC supercomputer LUMI](https://www.lumi-supercomputer.eu/).
## Model conversion info
* transformers version: 4.45.1
* OPUS-MT git hash: a0ea3b3
* port time: Mon Oct 7 17:13:22 EEST 2024
* port machine: LM0-400-22516.local