--- library_name: transformers language: - da - de - en - es - fo - fr - is - nb - nn - no - non - pt - sv tags: - translation - opus-mt-tc-bible license: apache-2.0 model-index: - name: opus-mt-tc-bible-big-gmq-deu_eng_fra_por_spa results: - task: name: Translation dan-deu type: translation args: dan-deu dataset: name: flores200-devtest type: flores200-devtest args: dan-deu metrics: - name: BLEU type: bleu value: 32.3 - name: chr-F type: chrf value: 0.60897 - task: name: Translation dan-eng type: translation args: dan-eng dataset: name: flores200-devtest type: flores200-devtest args: dan-eng metrics: - name: BLEU type: bleu value: 48.2 - name: chr-F type: chrf value: 0.71641 - task: name: Translation dan-fra type: translation args: dan-fra dataset: name: flores200-devtest type: flores200-devtest args: dan-fra metrics: - name: BLEU type: bleu value: 38.9 - name: chr-F type: chrf value: 0.63777 - task: name: Translation dan-por type: translation args: dan-por dataset: name: flores200-devtest type: flores200-devtest args: dan-por metrics: - name: BLEU type: bleu value: 36.7 - name: chr-F type: chrf value: 0.62302 - task: name: Translation dan-spa type: translation args: dan-spa dataset: name: flores200-devtest type: flores200-devtest args: dan-spa metrics: - name: BLEU type: bleu value: 24.4 - name: chr-F type: chrf value: 0.52803 - task: name: Translation fao-deu type: translation args: fao-deu dataset: name: flores200-devtest type: flores200-devtest args: fao-deu metrics: - name: BLEU type: bleu value: 16.0 - name: chr-F type: chrf value: 0.41184 - task: name: Translation fao-eng type: translation args: fao-eng dataset: name: flores200-devtest type: flores200-devtest args: fao-eng metrics: - name: BLEU type: bleu value: 21.2 - name: chr-F type: chrf value: 0.43308 - task: name: Translation fao-fra type: translation args: fao-fra dataset: name: flores200-devtest type: flores200-devtest args: fao-fra metrics: - name: BLEU type: bleu value: 16.7 - name: chr-F type: chrf value: 0.39253 - task: name: Translation fao-por type: translation args: fao-por dataset: name: flores200-devtest type: flores200-devtest args: fao-por metrics: - name: BLEU type: bleu value: 19.0 - name: chr-F type: chrf value: 0.42649 - task: name: Translation fao-spa type: translation args: fao-spa dataset: name: flores200-devtest type: flores200-devtest args: fao-spa metrics: - name: BLEU type: bleu value: 14.1 - name: chr-F type: chrf value: 0.38131 - task: name: Translation isl-deu type: translation args: isl-deu dataset: name: flores200-devtest type: flores200-devtest args: isl-deu metrics: - name: BLEU type: bleu value: 22.7 - name: chr-F type: chrf value: 0.51165 - task: name: Translation isl-eng type: translation args: isl-eng dataset: name: flores200-devtest type: flores200-devtest args: isl-eng metrics: - name: BLEU type: bleu value: 32.2 - name: chr-F type: chrf value: 0.57745 - task: name: Translation isl-fra type: translation args: isl-fra dataset: name: flores200-devtest type: flores200-devtest args: isl-fra metrics: - name: BLEU type: bleu value: 27.6 - name: chr-F type: chrf value: 0.54210 - task: name: Translation isl-por type: translation args: isl-por dataset: name: flores200-devtest type: flores200-devtest args: isl-por metrics: - name: BLEU type: bleu value: 26.1 - name: chr-F type: chrf value: 0.52479 - task: name: Translation isl-spa type: translation args: isl-spa dataset: name: flores200-devtest type: flores200-devtest args: isl-spa metrics: - name: BLEU type: bleu value: 19.2 - name: chr-F type: chrf value: 0.46837 - task: name: Translation nno-deu type: translation args: nno-deu dataset: name: flores200-devtest type: flores200-devtest args: nno-deu metrics: - name: BLEU type: bleu value: 29.2 - name: chr-F type: chrf value: 0.58054 - task: name: Translation nno-eng type: translation args: nno-eng dataset: name: flores200-devtest type: flores200-devtest args: nno-eng metrics: - name: BLEU type: bleu value: 45.0 - name: chr-F type: chrf value: 0.69114 - task: name: Translation nno-fra type: translation args: nno-fra dataset: name: flores200-devtest type: flores200-devtest args: nno-fra metrics: - name: BLEU type: bleu value: 36.0 - name: chr-F type: chrf value: 0.61334 - task: name: Translation nno-por type: translation args: nno-por dataset: name: flores200-devtest type: flores200-devtest args: nno-por metrics: - name: BLEU type: bleu value: 34.1 - name: chr-F type: chrf value: 0.60055 - task: name: Translation nno-spa type: translation args: nno-spa dataset: name: flores200-devtest type: flores200-devtest args: nno-spa metrics: - name: BLEU type: bleu value: 22.8 - name: chr-F type: chrf value: 0.51190 - task: name: Translation nob-deu type: translation args: nob-deu dataset: name: flores200-devtest type: flores200-devtest args: nob-deu metrics: - name: BLEU type: bleu value: 27.6 - name: chr-F type: chrf value: 0.57023 - task: name: Translation nob-eng type: translation args: nob-eng dataset: name: flores200-devtest type: flores200-devtest args: nob-eng metrics: - name: BLEU type: bleu value: 43.1 - name: chr-F type: chrf value: 0.67540 - task: name: Translation nob-fra type: translation args: nob-fra dataset: name: flores200-devtest type: flores200-devtest args: nob-fra metrics: - name: BLEU type: bleu value: 34.2 - name: chr-F type: chrf value: 0.60568 - task: name: Translation nob-por type: translation args: nob-por dataset: name: flores200-devtest type: flores200-devtest args: nob-por metrics: - name: BLEU type: bleu value: 32.8 - name: chr-F type: chrf value: 0.59466 - task: name: Translation nob-spa type: translation args: nob-spa dataset: name: flores200-devtest type: flores200-devtest args: nob-spa metrics: - name: BLEU type: bleu value: 22.4 - name: chr-F type: chrf value: 0.51138 - task: name: Translation swe-deu type: translation args: swe-deu dataset: name: flores200-devtest type: flores200-devtest args: swe-deu metrics: - name: BLEU type: bleu value: 32.6 - name: chr-F type: chrf value: 0.60630 - task: name: Translation swe-eng type: translation args: swe-eng dataset: name: flores200-devtest type: flores200-devtest args: swe-eng metrics: - name: BLEU type: bleu value: 48.1 - name: chr-F type: chrf value: 0.70584 - task: name: Translation swe-fra type: translation args: swe-fra dataset: name: flores200-devtest type: flores200-devtest args: swe-fra metrics: - name: BLEU type: bleu value: 39.1 - name: chr-F type: chrf value: 0.63608 - task: name: Translation swe-por type: translation args: swe-por dataset: name: flores200-devtest type: flores200-devtest args: swe-por metrics: - name: BLEU type: bleu value: 36.4 - name: chr-F type: chrf value: 0.62046 - task: name: Translation swe-spa type: translation args: swe-spa dataset: name: flores200-devtest type: flores200-devtest args: swe-spa metrics: - name: BLEU type: bleu value: 23.9 - name: chr-F type: chrf value: 0.52328 - task: name: Translation dan-eng type: translation args: dan-eng dataset: name: flores101-devtest type: flores_101 args: dan eng devtest metrics: - name: BLEU type: bleu value: 47.6 - name: chr-F type: chrf value: 0.71193 - task: name: Translation dan-fra type: translation args: dan-fra dataset: name: flores101-devtest type: flores_101 args: dan fra devtest metrics: - name: BLEU type: bleu value: 38.1 - name: chr-F type: chrf value: 0.63349 - task: name: Translation dan-por type: translation args: dan-por dataset: name: flores101-devtest type: flores_101 args: dan por devtest metrics: - name: BLEU type: bleu value: 36.2 - name: chr-F type: chrf value: 0.62063 - task: name: Translation dan-spa type: translation args: dan-spa dataset: name: flores101-devtest type: flores_101 args: dan spa devtest metrics: - name: BLEU type: bleu value: 24.2 - name: chr-F type: chrf value: 0.52557 - task: name: Translation isl-deu type: translation args: isl-deu dataset: name: flores101-devtest type: flores_101 args: isl deu devtest metrics: - name: BLEU type: bleu value: 22.2 - name: chr-F type: chrf value: 0.50581 - task: name: Translation isl-eng type: translation args: isl-eng dataset: name: flores101-devtest type: flores_101 args: isl eng devtest metrics: - name: BLEU type: bleu value: 31.6 - name: chr-F type: chrf value: 0.57294 - task: name: Translation isl-por type: translation args: isl-por dataset: name: flores101-devtest type: flores_101 args: isl por devtest metrics: - name: BLEU type: bleu value: 25.8 - name: chr-F type: chrf value: 0.52192 - task: name: Translation isl-spa type: translation args: isl-spa dataset: name: flores101-devtest type: flores_101 args: isl spa devtest metrics: - name: BLEU type: bleu value: 18.5 - name: chr-F type: chrf value: 0.46364 - task: name: Translation nob-eng type: translation args: nob-eng dataset: name: flores101-devtest type: flores_101 args: nob eng devtest metrics: - name: BLEU type: bleu value: 42.6 - name: chr-F type: chrf value: 0.67120 - task: name: Translation nob-fra type: translation args: nob-fra dataset: name: flores101-devtest type: flores_101 args: nob fra devtest metrics: - name: BLEU type: bleu value: 33.9 - name: chr-F type: chrf value: 0.60289 - task: name: Translation nob-spa type: translation args: nob-spa dataset: name: flores101-devtest type: flores_101 args: nob spa devtest metrics: - name: BLEU type: bleu value: 21.9 - name: chr-F type: chrf value: 0.50848 - task: name: Translation swe-deu type: translation args: swe-deu dataset: name: flores101-devtest type: flores_101 args: swe deu devtest metrics: - name: BLEU type: bleu value: 32.2 - name: chr-F type: chrf value: 0.60306 - task: name: Translation swe-eng type: translation args: swe-eng dataset: name: flores101-devtest type: flores_101 args: swe eng devtest metrics: - name: BLEU type: bleu value: 47.9 - name: chr-F type: chrf value: 0.70404 - task: name: Translation swe-por type: translation args: swe-por dataset: name: flores101-devtest type: flores_101 args: swe por devtest metrics: - name: BLEU type: bleu value: 35.7 - name: chr-F type: chrf value: 0.61418 - task: name: Translation dan-deu type: translation args: dan-deu dataset: name: ntrex128 type: ntrex128 args: dan-deu metrics: - name: BLEU type: bleu value: 25.3 - name: chr-F type: chrf value: 0.54229 - task: name: Translation dan-eng type: translation args: dan-eng dataset: name: ntrex128 type: ntrex128 args: dan-eng metrics: - name: BLEU type: bleu value: 38.7 - name: chr-F type: chrf value: 0.63083 - task: name: Translation dan-fra type: translation args: dan-fra dataset: name: ntrex128 type: ntrex128 args: dan-fra metrics: - name: BLEU type: bleu value: 26.2 - name: chr-F type: chrf value: 0.54088 - task: name: Translation dan-por type: translation args: dan-por dataset: name: ntrex128 type: ntrex128 args: dan-por metrics: - name: BLEU type: bleu value: 27.0 - name: chr-F type: chrf value: 0.53626 - task: name: Translation dan-spa type: translation args: dan-spa dataset: name: ntrex128 type: ntrex128 args: dan-spa metrics: - name: BLEU type: bleu value: 30.8 - name: chr-F type: chrf value: 0.56217 - task: name: Translation fao-deu type: translation args: fao-deu dataset: name: ntrex128 type: ntrex128 args: fao-deu metrics: - name: BLEU type: bleu value: 16.4 - name: chr-F type: chrf value: 0.41701 - task: name: Translation fao-eng type: translation args: fao-eng dataset: name: ntrex128 type: ntrex128 args: fao-eng metrics: - name: BLEU type: bleu value: 25.3 - name: chr-F type: chrf value: 0.47105 - task: name: Translation fao-fra type: translation args: fao-fra dataset: name: ntrex128 type: ntrex128 args: fao-fra metrics: - name: BLEU type: bleu value: 16.3 - name: chr-F type: chrf value: 0.40070 - task: name: Translation fao-por type: translation args: fao-por dataset: name: ntrex128 type: ntrex128 args: fao-por metrics: - name: BLEU type: bleu value: 18.0 - name: chr-F type: chrf value: 0.42005 - task: name: Translation fao-spa type: translation args: fao-spa dataset: name: ntrex128 type: ntrex128 args: fao-spa metrics: - name: BLEU type: bleu value: 20.5 - name: chr-F type: chrf value: 0.44085 - task: name: Translation isl-deu type: translation args: isl-deu dataset: name: ntrex128 type: ntrex128 args: isl-deu metrics: - name: BLEU type: bleu value: 20.5 - name: chr-F type: chrf value: 0.49932 - task: name: Translation isl-eng type: translation args: isl-eng dataset: name: ntrex128 type: ntrex128 args: isl-eng metrics: - name: BLEU type: bleu value: 29.7 - name: chr-F type: chrf value: 0.56856 - task: name: Translation isl-fra type: translation args: isl-fra dataset: name: ntrex128 type: ntrex128 args: isl-fra metrics: - name: BLEU type: bleu value: 24.6 - name: chr-F type: chrf value: 0.51998 - task: name: Translation isl-por type: translation args: isl-por dataset: name: ntrex128 type: ntrex128 args: isl-por metrics: - name: BLEU type: bleu value: 21.7 - name: chr-F type: chrf value: 0.49903 - task: name: Translation isl-spa type: translation args: isl-spa dataset: name: ntrex128 type: ntrex128 args: isl-spa metrics: - name: BLEU type: bleu value: 27.1 - name: chr-F type: chrf value: 0.53171 - task: name: Translation nno-deu type: translation args: nno-deu dataset: name: ntrex128 type: ntrex128 args: nno-deu metrics: - name: BLEU type: bleu value: 24.4 - name: chr-F type: chrf value: 0.53000 - task: name: Translation nno-eng type: translation args: nno-eng dataset: name: ntrex128 type: ntrex128 args: nno-eng metrics: - name: BLEU type: bleu value: 42.9 - name: chr-F type: chrf value: 0.65866 - task: name: Translation nno-fra type: translation args: nno-fra dataset: name: ntrex128 type: ntrex128 args: nno-fra metrics: - name: BLEU type: bleu value: 27.5 - name: chr-F type: chrf value: 0.54339 - task: name: Translation nno-por type: translation args: nno-por dataset: name: ntrex128 type: ntrex128 args: nno-por metrics: - name: BLEU type: bleu value: 26.3 - name: chr-F type: chrf value: 0.53242 - task: name: Translation nno-spa type: translation args: nno-spa dataset: name: ntrex128 type: ntrex128 args: nno-spa metrics: - name: BLEU type: bleu value: 30.4 - name: chr-F type: chrf value: 0.55889 - task: name: Translation nob-deu type: translation args: nob-deu dataset: name: ntrex128 type: ntrex128 args: nob-deu metrics: - name: BLEU type: bleu value: 26.8 - name: chr-F type: chrf value: 0.55549 - task: name: Translation nob-eng type: translation args: nob-eng dataset: name: ntrex128 type: ntrex128 args: nob-eng metrics: - name: BLEU type: bleu value: 40.9 - name: chr-F type: chrf value: 0.65580 - task: name: Translation nob-fra type: translation args: nob-fra dataset: name: ntrex128 type: ntrex128 args: nob-fra metrics: - name: BLEU type: bleu value: 29.2 - name: chr-F type: chrf value: 0.56187 - task: name: Translation nob-por type: translation args: nob-por dataset: name: ntrex128 type: ntrex128 args: nob-por metrics: - name: BLEU type: bleu value: 26.6 - name: chr-F type: chrf value: 0.54392 - task: name: Translation nob-spa type: translation args: nob-spa dataset: name: ntrex128 type: ntrex128 args: nob-spa metrics: - name: BLEU type: bleu value: 32.6 - name: chr-F type: chrf value: 0.57998 - task: name: Translation swe-deu type: translation args: swe-deu dataset: name: ntrex128 type: ntrex128 args: swe-deu metrics: - name: BLEU type: bleu value: 26.7 - name: chr-F type: chrf value: 0.55549 - task: name: Translation swe-eng type: translation args: swe-eng dataset: name: ntrex128 type: ntrex128 args: swe-eng metrics: - name: BLEU type: bleu value: 42.2 - name: chr-F type: chrf value: 0.66348 - task: name: Translation swe-fra type: translation args: swe-fra dataset: name: ntrex128 type: ntrex128 args: swe-fra metrics: - name: BLEU type: bleu value: 29.0 - name: chr-F type: chrf value: 0.56310 - task: name: Translation swe-por type: translation args: swe-por dataset: name: ntrex128 type: ntrex128 args: swe-por metrics: - name: BLEU type: bleu value: 27.8 - name: chr-F type: chrf value: 0.54965 - task: name: Translation swe-spa type: translation args: swe-spa dataset: name: ntrex128 type: ntrex128 args: swe-spa metrics: - name: BLEU type: bleu value: 32.8 - name: chr-F type: chrf value: 0.58035 - task: name: Translation dan-deu type: translation args: dan-deu dataset: name: tatoeba-test-v2021-08-07 type: tatoeba_mt args: dan-deu metrics: - name: BLEU type: bleu value: 56.7 - name: chr-F type: chrf value: 0.74460 - task: name: Translation dan-eng type: translation args: dan-eng dataset: name: tatoeba-test-v2021-08-07 type: tatoeba_mt args: dan-eng metrics: - name: BLEU type: bleu value: 64.3 - name: chr-F type: chrf value: 0.77233 - task: name: Translation dan-fra type: translation args: dan-fra dataset: name: tatoeba-test-v2021-08-07 type: tatoeba_mt args: dan-fra metrics: - name: BLEU type: bleu value: 60.8 - name: chr-F type: chrf value: 0.76425 - task: name: Translation dan-por type: translation args: dan-por dataset: name: tatoeba-test-v2021-08-07 type: tatoeba_mt args: dan-por metrics: - name: BLEU type: bleu value: 60.0 - name: chr-F type: chrf value: 0.77248 - task: name: Translation dan-spa type: translation args: dan-spa dataset: name: tatoeba-test-v2021-08-07 type: tatoeba_mt args: dan-spa metrics: - name: BLEU type: bleu value: 54.9 - name: chr-F type: chrf value: 0.72567 - task: name: Translation fao-eng type: translation args: fao-eng dataset: name: tatoeba-test-v2021-08-07 type: tatoeba_mt args: fao-eng metrics: - name: BLEU type: bleu value: 39.6 - name: chr-F type: chrf value: 0.54571 - task: name: Translation isl-deu type: translation args: isl-deu dataset: name: tatoeba-test-v2021-08-07 type: tatoeba_mt args: isl-deu metrics: - name: BLEU type: bleu value: 51.4 - name: chr-F type: chrf value: 0.68535 - task: name: Translation isl-eng type: translation args: isl-eng dataset: name: tatoeba-test-v2021-08-07 type: tatoeba_mt args: isl-eng metrics: - name: BLEU type: bleu value: 51.7 - name: chr-F type: chrf value: 0.67066 - task: name: Translation isl-spa type: translation args: isl-spa dataset: name: tatoeba-test-v2021-08-07 type: tatoeba_mt args: isl-spa metrics: - name: BLEU type: bleu value: 48.5 - name: chr-F type: chrf value: 0.65659 - task: name: Translation multi-multi type: translation args: multi-multi dataset: name: tatoeba-test-v2020-07-28-v2023-09-26 type: tatoeba_mt args: multi-multi metrics: - name: BLEU type: bleu value: 58.2 - name: chr-F type: chrf value: 0.73325 - task: name: Translation nno-eng type: translation args: nno-eng dataset: name: tatoeba-test-v2021-08-07 type: tatoeba_mt args: nno-eng metrics: - name: BLEU type: bleu value: 55.5 - name: chr-F type: chrf value: 0.69415 - task: name: Translation nob-deu type: translation args: nob-deu dataset: name: tatoeba-test-v2021-08-07 type: tatoeba_mt args: nob-deu metrics: - name: BLEU type: bleu value: 50.5 - name: chr-F type: chrf value: 0.69862 - task: name: Translation nob-eng type: translation args: nob-eng dataset: name: tatoeba-test-v2021-08-07 type: tatoeba_mt args: nob-eng metrics: - name: BLEU type: bleu value: 59.2 - name: chr-F type: chrf value: 0.72912 - task: name: Translation nob-fra type: translation args: nob-fra dataset: name: tatoeba-test-v2021-08-07 type: tatoeba_mt args: nob-fra metrics: - name: BLEU type: bleu value: 52.5 - name: chr-F type: chrf value: 0.71392 - task: name: Translation nob-spa type: translation args: nob-spa dataset: name: tatoeba-test-v2021-08-07 type: tatoeba_mt args: nob-spa metrics: - name: BLEU type: bleu value: 55.1 - name: chr-F type: chrf value: 0.73300 - task: name: Translation nor-deu type: translation args: nor-deu dataset: name: tatoeba-test-v2021-08-07 type: tatoeba_mt args: nor-deu metrics: - name: BLEU type: bleu value: 50.7 - name: chr-F type: chrf value: 0.69923 - task: name: Translation nor-eng type: translation args: nor-eng dataset: name: tatoeba-test-v2021-08-07 type: tatoeba_mt args: nor-eng metrics: - name: BLEU type: bleu value: 58.8 - name: chr-F type: chrf value: 0.72587 - task: name: Translation nor-fra type: translation args: nor-fra dataset: name: tatoeba-test-v2021-08-07 type: tatoeba_mt args: nor-fra metrics: - name: BLEU type: bleu value: 55.1 - name: chr-F type: chrf value: 0.73052 - task: name: Translation nor-por type: translation args: nor-por dataset: name: tatoeba-test-v2021-08-07 type: tatoeba_mt args: nor-por metrics: - name: BLEU type: bleu value: 45.4 - name: chr-F type: chrf value: 0.67948 - task: name: Translation nor-spa type: translation args: nor-spa dataset: name: tatoeba-test-v2021-08-07 type: tatoeba_mt args: nor-spa metrics: - name: BLEU type: bleu value: 55.3 - name: chr-F type: chrf value: 0.73320 - task: name: Translation swe-deu type: translation args: swe-deu dataset: name: tatoeba-test-v2021-08-07 type: tatoeba_mt args: swe-deu metrics: - name: BLEU type: bleu value: 55.4 - name: chr-F type: chrf value: 0.71816 - task: name: Translation swe-eng type: translation args: swe-eng dataset: name: tatoeba-test-v2021-08-07 type: tatoeba_mt args: swe-eng metrics: - name: BLEU type: bleu value: 64.8 - name: chr-F type: chrf value: 0.76648 - task: name: Translation swe-fra type: translation args: swe-fra dataset: name: tatoeba-test-v2021-08-07 type: tatoeba_mt args: swe-fra metrics: - name: BLEU type: bleu value: 57.4 - name: chr-F type: chrf value: 0.72847 - task: name: Translation swe-por type: translation args: swe-por dataset: name: tatoeba-test-v2021-08-07 type: tatoeba_mt args: swe-por metrics: - name: BLEU type: bleu value: 50.3 - name: chr-F type: chrf value: 0.70554 - task: name: Translation swe-spa type: translation args: swe-spa dataset: name: tatoeba-test-v2021-08-07 type: tatoeba_mt args: swe-spa metrics: - name: BLEU type: bleu value: 54.3 - name: chr-F type: chrf value: 0.70926 - task: name: Translation isl-eng type: translation args: isl-eng dataset: name: newstest2021 type: wmt-2021-news args: isl-eng metrics: - name: BLEU type: bleu value: 32.4 - name: chr-F type: chrf value: 0.56364 --- # opus-mt-tc-bible-big-gmq-deu_eng_fra_por_spa ## Table of Contents - [Model Details](#model-details) - [Uses](#uses) - [Risks, Limitations and Biases](#risks-limitations-and-biases) - [How to Get Started With the Model](#how-to-get-started-with-the-model) - [Training](#training) - [Evaluation](#evaluation) - [Citation Information](#citation-information) - [Acknowledgements](#acknowledgements) ## Model Details Neural machine translation model for translating from North Germanic languages (gmq) to unknown (deu+eng+fra+por+spa). This model is part of the [OPUS-MT project](https://github.com/Helsinki-NLP/Opus-MT), an effort to make neural machine translation models widely available and accessible for many languages in the world. All models are originally trained using the amazing framework of [Marian NMT](https://marian-nmt.github.io/), an efficient NMT implementation written in pure C++. The models have been converted to pyTorch using the transformers library by huggingface. Training data is taken from [OPUS](https://opus.nlpl.eu/) and training pipelines use the procedures of [OPUS-MT-train](https://github.com/Helsinki-NLP/Opus-MT-train). **Model Description:** - **Developed by:** Language Technology Research Group at the University of Helsinki - **Model Type:** Translation (transformer-big) - **Release**: 2024-05-30 - **License:** Apache-2.0 - **Language(s):** - Source Language(s): dan fao isl nno nob non nor swe - Target Language(s): deu eng fra por spa - Valid Target Language Labels: >>deu<< >>eng<< >>fra<< >>por<< >>spa<< >>xxx<< - **Original Model**: [opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-30.zip](https://object.pouta.csc.fi/Tatoeba-MT-models/gmq-deu+eng+fra+por+spa/opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-30.zip) - **Resources for more information:** - [OPUS-MT dashboard](https://opus.nlpl.eu/dashboard/index.php?pkg=opusmt&test=all&scoreslang=all&chart=standard&model=Tatoeba-MT-models/gmq-deu%2Beng%2Bfra%2Bpor%2Bspa/opusTCv20230926max50%2Bbt%2Bjhubc_transformer-big_2024-05-30) - [OPUS-MT-train GitHub Repo](https://github.com/Helsinki-NLP/OPUS-MT-train) - [More information about MarianNMT models in the transformers library](https://huggingface.co./docs/transformers/model_doc/marian) - [Tatoeba Translation Challenge](https://github.com/Helsinki-NLP/Tatoeba-Challenge/) - [HPLT bilingual data v1 (as part of the Tatoeba Translation Challenge dataset)](https://hplt-project.org/datasets/v1) - [A massively parallel Bible corpus](https://aclanthology.org/L14-1215/) This is a multilingual translation model with multiple target languages. A sentence initial language token is required in the form of `>>id<<` (id = valid target language ID), e.g. `>>deu<<` ## Uses This model can be used for translation and text-to-text generation. ## Risks, Limitations and Biases **CONTENT WARNING: Readers should be aware that the model is trained on various public data sets that may contain content that is disturbing, offensive, and can propagate historical and current stereotypes.** Significant research has explored bias and fairness issues with language models (see, e.g., [Sheng et al. (2021)](https://aclanthology.org/2021.acl-long.330.pdf) and [Bender et al. (2021)](https://dl.acm.org/doi/pdf/10.1145/3442188.3445922)). ## How to Get Started With the Model A short example code: ```python from transformers import MarianMTModel, MarianTokenizer src_text = [ ">>deu<< Replace this with text in an accepted source language.", ">>spa<< This is the second sentence." ] model_name = "pytorch-models/opus-mt-tc-bible-big-gmq-deu_eng_fra_por_spa" tokenizer = MarianTokenizer.from_pretrained(model_name) model = MarianMTModel.from_pretrained(model_name) translated = model.generate(**tokenizer(src_text, return_tensors="pt", padding=True)) for t in translated: print( tokenizer.decode(t, skip_special_tokens=True) ) ``` You can also use OPUS-MT models with the transformers pipelines, for example: ```python from transformers import pipeline pipe = pipeline("translation", model="Helsinki-NLP/opus-mt-tc-bible-big-gmq-deu_eng_fra_por_spa") print(pipe(">>deu<< Replace this with text in an accepted source language.")) ``` ## Training - **Data**: opusTCv20230926max50+bt+jhubc ([source](https://github.com/Helsinki-NLP/Tatoeba-Challenge)) - **Pre-processing**: SentencePiece (spm32k,spm32k) - **Model Type:** transformer-big - **Original MarianNMT Model**: [opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-30.zip](https://object.pouta.csc.fi/Tatoeba-MT-models/gmq-deu+eng+fra+por+spa/opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-30.zip) - **Training Scripts**: [GitHub Repo](https://github.com/Helsinki-NLP/OPUS-MT-train) ## Evaluation * [Model scores at the OPUS-MT dashboard](https://opus.nlpl.eu/dashboard/index.php?pkg=opusmt&test=all&scoreslang=all&chart=standard&model=Tatoeba-MT-models/gmq-deu%2Beng%2Bfra%2Bpor%2Bspa/opusTCv20230926max50%2Bbt%2Bjhubc_transformer-big_2024-05-30) * test set translations: [opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-29.test.txt](https://object.pouta.csc.fi/Tatoeba-MT-models/gmq-deu+eng+fra+por+spa/opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-29.test.txt) * test set scores: [opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-29.eval.txt](https://object.pouta.csc.fi/Tatoeba-MT-models/gmq-deu+eng+fra+por+spa/opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-29.eval.txt) * benchmark results: [benchmark_results.txt](benchmark_results.txt) * benchmark output: [benchmark_translations.zip](benchmark_translations.zip) | langpair | testset | chr-F | BLEU | #sent | #words | |----------|---------|-------|-------|-------|--------| | dan-deu | tatoeba-test-v2021-08-07 | 0.74460 | 56.7 | 9998 | 76055 | | dan-eng | tatoeba-test-v2021-08-07 | 0.77233 | 64.3 | 10795 | 79684 | | dan-fra | tatoeba-test-v2021-08-07 | 0.76425 | 60.8 | 1731 | 11882 | | dan-por | tatoeba-test-v2021-08-07 | 0.77248 | 60.0 | 873 | 5360 | | dan-spa | tatoeba-test-v2021-08-07 | 0.72567 | 54.9 | 5000 | 35528 | | fao-eng | tatoeba-test-v2021-08-07 | 0.54571 | 39.6 | 294 | 1984 | | isl-deu | tatoeba-test-v2021-08-07 | 0.68535 | 51.4 | 969 | 6279 | | isl-eng | tatoeba-test-v2021-08-07 | 0.67066 | 51.7 | 2503 | 19788 | | isl-spa | tatoeba-test-v2021-08-07 | 0.65659 | 48.5 | 238 | 1229 | | nno-eng | tatoeba-test-v2021-08-07 | 0.69415 | 55.5 | 460 | 3524 | | nob-deu | tatoeba-test-v2021-08-07 | 0.69862 | 50.5 | 3525 | 33592 | | nob-eng | tatoeba-test-v2021-08-07 | 0.72912 | 59.2 | 4539 | 36823 | | nob-fra | tatoeba-test-v2021-08-07 | 0.71392 | 52.5 | 323 | 2269 | | nob-spa | tatoeba-test-v2021-08-07 | 0.73300 | 55.1 | 885 | 6866 | | nor-deu | tatoeba-test-v2021-08-07 | 0.69923 | 50.7 | 3651 | 34575 | | nor-eng | tatoeba-test-v2021-08-07 | 0.72587 | 58.8 | 5000 | 40355 | | nor-fra | tatoeba-test-v2021-08-07 | 0.73052 | 55.1 | 477 | 3213 | | nor-por | tatoeba-test-v2021-08-07 | 0.67948 | 45.4 | 481 | 4182 | | nor-spa | tatoeba-test-v2021-08-07 | 0.73320 | 55.3 | 960 | 7311 | | swe-deu | tatoeba-test-v2021-08-07 | 0.71816 | 55.4 | 3410 | 23494 | | swe-eng | tatoeba-test-v2021-08-07 | 0.76648 | 64.8 | 10362 | 68513 | | swe-fra | tatoeba-test-v2021-08-07 | 0.72847 | 57.4 | 1407 | 9580 | | swe-por | tatoeba-test-v2021-08-07 | 0.70554 | 50.3 | 320 | 2032 | | swe-spa | tatoeba-test-v2021-08-07 | 0.70926 | 54.3 | 1351 | 8235 | | dan-eng | flores101-devtest | 0.71193 | 47.6 | 1012 | 24721 | | dan-fra | flores101-devtest | 0.63349 | 38.1 | 1012 | 28343 | | dan-por | flores101-devtest | 0.62063 | 36.2 | 1012 | 26519 | | dan-spa | flores101-devtest | 0.52557 | 24.2 | 1012 | 29199 | | isl-deu | flores101-devtest | 0.50581 | 22.2 | 1012 | 25094 | | isl-eng | flores101-devtest | 0.57294 | 31.6 | 1012 | 24721 | | isl-por | flores101-devtest | 0.52192 | 25.8 | 1012 | 26519 | | isl-spa | flores101-devtest | 0.46364 | 18.5 | 1012 | 29199 | | nob-eng | flores101-devtest | 0.67120 | 42.6 | 1012 | 24721 | | nob-fra | flores101-devtest | 0.60289 | 33.9 | 1012 | 28343 | | nob-spa | flores101-devtest | 0.50848 | 21.9 | 1012 | 29199 | | swe-deu | flores101-devtest | 0.60306 | 32.2 | 1012 | 25094 | | swe-eng | flores101-devtest | 0.70404 | 47.9 | 1012 | 24721 | | swe-por | flores101-devtest | 0.61418 | 35.7 | 1012 | 26519 | | dan-deu | flores200-devtest | 0.60897 | 32.3 | 1012 | 25094 | | dan-eng | flores200-devtest | 0.71641 | 48.2 | 1012 | 24721 | | dan-fra | flores200-devtest | 0.63777 | 38.9 | 1012 | 28343 | | dan-por | flores200-devtest | 0.62302 | 36.7 | 1012 | 26519 | | dan-spa | flores200-devtest | 0.52803 | 24.4 | 1012 | 29199 | | fao-deu | flores200-devtest | 0.41184 | 16.0 | 1012 | 25094 | | fao-eng | flores200-devtest | 0.43308 | 21.2 | 1012 | 24721 | | fao-por | flores200-devtest | 0.42649 | 19.0 | 1012 | 26519 | | isl-deu | flores200-devtest | 0.51165 | 22.7 | 1012 | 25094 | | isl-eng | flores200-devtest | 0.57745 | 32.2 | 1012 | 24721 | | isl-fra | flores200-devtest | 0.54210 | 27.6 | 1012 | 28343 | | isl-por | flores200-devtest | 0.52479 | 26.1 | 1012 | 26519 | | isl-spa | flores200-devtest | 0.46837 | 19.2 | 1012 | 29199 | | nno-deu | flores200-devtest | 0.58054 | 29.2 | 1012 | 25094 | | nno-eng | flores200-devtest | 0.69114 | 45.0 | 1012 | 24721 | | nno-fra | flores200-devtest | 0.61334 | 36.0 | 1012 | 28343 | | nno-por | flores200-devtest | 0.60055 | 34.1 | 1012 | 26519 | | nno-spa | flores200-devtest | 0.51190 | 22.8 | 1012 | 29199 | | nob-deu | flores200-devtest | 0.57023 | 27.6 | 1012 | 25094 | | nob-eng | flores200-devtest | 0.67540 | 43.1 | 1012 | 24721 | | nob-fra | flores200-devtest | 0.60568 | 34.2 | 1012 | 28343 | | nob-por | flores200-devtest | 0.59466 | 32.8 | 1012 | 26519 | | nob-spa | flores200-devtest | 0.51138 | 22.4 | 1012 | 29199 | | swe-deu | flores200-devtest | 0.60630 | 32.6 | 1012 | 25094 | | swe-eng | flores200-devtest | 0.70584 | 48.1 | 1012 | 24721 | | swe-fra | flores200-devtest | 0.63608 | 39.1 | 1012 | 28343 | | swe-por | flores200-devtest | 0.62046 | 36.4 | 1012 | 26519 | | swe-spa | flores200-devtest | 0.52328 | 23.9 | 1012 | 29199 | | isl-eng | newstest2021 | 0.56364 | 32.4 | 1000 | 22529 | | dan-deu | ntrex128 | 0.54229 | 25.3 | 1997 | 48761 | | dan-eng | ntrex128 | 0.63083 | 38.7 | 1997 | 47673 | | dan-fra | ntrex128 | 0.54088 | 26.2 | 1997 | 53481 | | dan-por | ntrex128 | 0.53626 | 27.0 | 1997 | 51631 | | dan-spa | ntrex128 | 0.56217 | 30.8 | 1997 | 54107 | | fao-deu | ntrex128 | 0.41701 | 16.4 | 1997 | 48761 | | fao-eng | ntrex128 | 0.47105 | 25.3 | 1997 | 47673 | | fao-fra | ntrex128 | 0.40070 | 16.3 | 1997 | 53481 | | fao-por | ntrex128 | 0.42005 | 18.0 | 1997 | 51631 | | fao-spa | ntrex128 | 0.44085 | 20.5 | 1997 | 54107 | | isl-deu | ntrex128 | 0.49932 | 20.5 | 1997 | 48761 | | isl-eng | ntrex128 | 0.56856 | 29.7 | 1997 | 47673 | | isl-fra | ntrex128 | 0.51998 | 24.6 | 1997 | 53481 | | isl-por | ntrex128 | 0.49903 | 21.7 | 1997 | 51631 | | isl-spa | ntrex128 | 0.53171 | 27.1 | 1997 | 54107 | | nno-deu | ntrex128 | 0.53000 | 24.4 | 1997 | 48761 | | nno-eng | ntrex128 | 0.65866 | 42.9 | 1997 | 47673 | | nno-fra | ntrex128 | 0.54339 | 27.5 | 1997 | 53481 | | nno-por | ntrex128 | 0.53242 | 26.3 | 1997 | 51631 | | nno-spa | ntrex128 | 0.55889 | 30.4 | 1997 | 54107 | | nob-deu | ntrex128 | 0.55549 | 26.8 | 1997 | 48761 | | nob-eng | ntrex128 | 0.65580 | 40.9 | 1997 | 47673 | | nob-fra | ntrex128 | 0.56187 | 29.2 | 1997 | 53481 | | nob-por | ntrex128 | 0.54392 | 26.6 | 1997 | 51631 | | nob-spa | ntrex128 | 0.57998 | 32.6 | 1997 | 54107 | | swe-deu | ntrex128 | 0.55549 | 26.7 | 1997 | 48761 | | swe-eng | ntrex128 | 0.66348 | 42.2 | 1997 | 47673 | | swe-fra | ntrex128 | 0.56310 | 29.0 | 1997 | 53481 | | swe-por | ntrex128 | 0.54965 | 27.8 | 1997 | 51631 | | swe-spa | ntrex128 | 0.58035 | 32.8 | 1997 | 54107 | ## Citation Information * Publications: [Democratizing neural machine translation with OPUS-MT](https://doi.org/10.1007/s10579-023-09704-w) and [OPUS-MT – Building open translation services for the World](https://aclanthology.org/2020.eamt-1.61/) and [The Tatoeba Translation Challenge – Realistic Data Sets for Low Resource and Multilingual MT](https://aclanthology.org/2020.wmt-1.139/) (Please, cite if you use this model.) ```bibtex @article{tiedemann2023democratizing, title={Democratizing neural machine translation with {OPUS-MT}}, author={Tiedemann, J{\"o}rg and Aulamo, Mikko and Bakshandaeva, Daria and Boggia, Michele and Gr{\"o}nroos, Stig-Arne and Nieminen, Tommi and Raganato, Alessandro and Scherrer, Yves and Vazquez, Raul and Virpioja, Sami}, journal={Language Resources and Evaluation}, number={58}, pages={713--755}, year={2023}, publisher={Springer Nature}, issn={1574-0218}, doi={10.1007/s10579-023-09704-w} } @inproceedings{tiedemann-thottingal-2020-opus, title = "{OPUS}-{MT} {--} Building open translation services for the World", author = {Tiedemann, J{\"o}rg and Thottingal, Santhosh}, booktitle = "Proceedings of the 22nd Annual Conference of the European Association for Machine Translation", month = nov, year = "2020", address = "Lisboa, Portugal", publisher = "European Association for Machine Translation", url = "https://aclanthology.org/2020.eamt-1.61", pages = "479--480", } @inproceedings{tiedemann-2020-tatoeba, title = "The Tatoeba Translation Challenge {--} Realistic Data Sets for Low Resource and Multilingual {MT}", author = {Tiedemann, J{\"o}rg}, booktitle = "Proceedings of the Fifth Conference on Machine Translation", month = nov, year = "2020", address = "Online", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/2020.wmt-1.139", pages = "1174--1182", } ``` ## Acknowledgements The work is supported by the [HPLT project](https://hplt-project.org/), funded by the European Union’s Horizon Europe research and innovation programme under grant agreement No 101070350. We are also grateful for the generous computational resources and IT infrastructure provided by [CSC -- IT Center for Science](https://www.csc.fi/), Finland, and the [EuroHPC supercomputer LUMI](https://www.lumi-supercomputer.eu/). ## Model conversion info * transformers version: 4.45.1 * OPUS-MT git hash: 0882077 * port time: Tue Oct 8 11:11:37 EEST 2024 * port machine: LM0-400-22516.local