OpenLLM-Ro
/

RoLlama2-7b-Base

PyTorch

Romanian

llama

Eval Results

Model card Files Files and versions Community

mihaimasala commited on 8 days ago

Commit

f2b05bc

•

1 Parent(s): acea762

Update README.md

Browse files

Files changed (1) hide show

README.md +445 -444

README.md CHANGED Viewed

@@ -3,452 +3,453 @@ license: llama2
 language:
 - ro
 base_model: meta-llama/Llama-2-7b-hf
-model-index:
-- name: OpenLLM-Ro/RoLlama2-7b-Base-2024-05-14
-  results:
-  - task:
-      type: text-generation
-    dataset:
-      name: Romanian_Academic_Benchmarks
-      type: Romanian_Academic_Benchmarks
-    metrics:
-    - name: Average accuracy
-      type: accuracy
-      value: 38.03
-  - task:
-      type: text-generation
-    dataset:
-      name: OpenLLM-Ro/ro_arc_challenge
-      type: OpenLLM-Ro/ro_arc_challenge
-    metrics:
-    - name: Average accuracy
-      type: accuracy
-      value: 37.95
-  - task:
-      type: text-generation
-    dataset:
-      name: OpenLLM-Ro/ro_mmlu
-      type: OpenLLM-Ro/ro_mmlu
-    metrics:
-    - name: Average accuracy
-      type: accuracy
-      value: 27.22
-  - task:
-      type: text-generation
-    dataset:
-      name: OpenLLM-Ro/ro_winogrande
-      type: OpenLLM-Ro/ro_winogrande
-    metrics:
-    - name: Average accuracy
-      type: accuracy
-      value: 59.29
-  - task:
-      type: text-generation
-    dataset:
-      name: OpenLLM-Ro/ro_hellaswag
-      type: OpenLLM-Ro/ro_hellaswag
-    metrics:
-    - name: Average accuracy
-      type: accuracy
-      value: 57.22
-  - task:
-      type: text-generation
-    dataset:
-      name: OpenLLM-Ro/ro_gsm8k
-      type: OpenLLM-Ro/ro_gsm8k
-    metrics:
-    - name: Average accuracy
-      type: accuracy
-      value: 2.53
-  - task:
-      type: text-generation
-    dataset:
-      name: OpenLLM-Ro/ro_truthfulqa
-      type: OpenLLM-Ro/ro_truthfulqa
-    metrics:
-    - name: Average accuracy
-      type: accuracy
-      value: 44
-  - task:
-      type: text-generation
-    dataset:
-      name: LaRoSeDa_binary
-      type: LaRoSeDa_binary
-    metrics:
-    - name: Average macro-f1
-      type: macro-f1
-      value: 83.25
-  - task:
-      type: text-generation
-    dataset:
-      name: LaRoSeDa_multiclass
-      type: LaRoSeDa_multiclass
-    metrics:
-    - name: Average macro-f1
-      type: macro-f1
-      value: 61.04
-  - task:
-      type: text-generation
-    dataset:
-      name: LaRoSeDa_binary_finetuned
-      type: LaRoSeDa_binary_finetuned
-    metrics:
-    - name: Average macro-f1
-      type: macro-f1
-      value: 98.97
-  - task:
-      type: text-generation
-    dataset:
-      name: LaRoSeDa_multiclass_finetuned
-      type: LaRoSeDa_multiclass_finetuned
-    metrics:
-    - name: Average macro-f1
-      type: macro-f1
-      value: 87.72
-  - task:
-      type: text-generation
-    dataset:
-      name: WMT_EN-RO
-      type: WMT_EN-RO
-    metrics:
-    - name: Average bleu
-      type: bleu
-      value: 10.01
-  - task:
-      type: text-generation
-    dataset:
-      name: WMT_RO-EN
-      type: WMT_RO-EN
-    metrics:
-    - name: Average bleu
-      type: bleu
-      value: 13.03
-  - task:
-      type: text-generation
-    dataset:
-      name: WMT_EN-RO_finetuned
-      type: WMT_EN-RO_finetuned
-    metrics:
-    - name: Average bleu
-      type: bleu
-      value: 27.85
-  - task:
-      type: text-generation
-    dataset:
-      name: WMT_RO-EN_finetuned
-      type: WMT_RO-EN_finetuned
-    metrics:
-    - name: Average bleu
-      type: bleu
-      value: 39.3
-  - task:
-      type: text-generation
-    dataset:
-      name: XQuAD
-      type: XQuAD
-    metrics:
-    - name: Average exact_match
-      type: exact_match
-      value: 30.15
-  - task:
-      type: text-generation
-    dataset:
-      name: XQuAD
-      type: XQuAD
-    metrics:
-    - name: Average f1
-      type: f1
-      value: 47.03
-  - task:
-      type: text-generation
-    dataset:
-      name: XQuAD_finetuned
-      type: XQuAD_finetuned
-    metrics:
-    - name: Average exact_match
-      type: exact_match
-      value: 67.06
-  - task:
-      type: text-generation
-    dataset:
-      name: XQuAD_finetuned
-      type: XQuAD_finetuned
-    metrics:
-    - name: Average f1
-      type: f1
-      value: 79.96
-  - task:
-      type: text-generation
-    dataset:
-      name: STS
-      type: STS
-    metrics:
-    - name: Average spearman
-      type: spearman
-      value: 7.89
-  - task:
-      type: text-generation
-    dataset:
-      name: STS
-      type: STS
-    metrics:
-    - name: Average pearson
-      type: pearson
-      value: 7.98
-  - task:
-      type: text-generation
-    dataset:
-      name: STS_finetuned
-      type: STS_finetuned
-    metrics:
-    - name: Average spearman
-      type: spearman
-      value: 71.75
-  - task:
-      type: text-generation
-    dataset:
-      name: STS_finetuned
-      type: STS_finetuned
-    metrics:
-    - name: Average pearson
-      type: pearson
-      value: 71.99
-  - task:
-      type: text-generation
-    dataset:
-      name: OpenLLM-Ro/ro_arc_challenge
-      type: OpenLLM-Ro/ro_arc_challenge
-    metrics:
-    - name: 0-shot
-      type: accuracy
-      value: 35.56
-    - name: 1-shot
-      type: accuracy
-      value: 36.42
-    - name: 3-shot
-      type: accuracy
-      value: 38.56
-    - name: 5-shot
-      type: accuracy
-      value: 38.39
-    - name: 10-shot
-      type: accuracy
-      value: 39.07
-    - name: 25-shot
-      type: accuracy
-      value: 39.67
-  - task:
-      type: text-generation
-    dataset:
-      name: OpenLLM-Ro/ro_mmlu
-      type: OpenLLM-Ro/ro_mmlu
-    metrics:
-    - name: 0-shot
-      type: accuracy
-      value: 25.82
-    - name: 1-shot
-      type: accuracy
-      value: 25.48
-    - name: 3-shot
-      type: accuracy
-      value: 27.61
-    - name: 5-shot
-      type: accuracy
-      value: 29.96
-  - task:
-      type: text-generation
-    dataset:
-      name: OpenLLM-Ro/ro_winogrande
-      type: OpenLLM-Ro/ro_winogrande
-    metrics:
-    - name: 0-shot
-      type: accuracy
-      value: 58.72
-    - name: 1-shot
-      type: accuracy
-      value: 58.88
-    - name: 3-shot
-      type: accuracy
-      value: 60.38
-    - name: 5-shot
-      type: accuracy
-      value: 59.19
-  - task:
-      type: text-generation
-    dataset:
-      name: OpenLLM-Ro/ro_hellaswag
-      type: OpenLLM-Ro/ro_hellaswag
-    metrics:
-    - name: 0-shot
-      type: accuracy
-      value: 55.85
-    - name: 1-shot
-      type: accuracy
-      value: 57.06
-    - name: 3-shot
-      type: accuracy
-      value: 57.52
-    - name: 5-shot
-      type: accuracy
-      value: 57.89
-    - name: 10-shot
-      type: accuracy
-      value: 57.79
-  - task:
-      type: text-generation
-    dataset:
-      name: OpenLLM-Ro/ro_gsm8k
-      type: OpenLLM-Ro/ro_gsm8k
-    metrics:
-    - name: 0-shot
-      type: accuracy
-      value: 0
-    - name: 1-shot
-      type: accuracy
-      value: 2.96
-    - name: 3-shot
-      type: accuracy
-      value: 4.62
-  - task:
-      type: text-generation
-    dataset:
-      name: LaRoSeDa_binary
-      type: LaRoSeDa_binary
-    metrics:
-    - name: 0-shot
-      type: macro-f1
-      value: 42.78
-    - name: 1-shot
-      type: macro-f1
-      value: 98
-    - name: 3-shot
-      type: macro-f1
-      value: 95.13
-    - name: 5-shot
-      type: macro-f1
-      value: 97.07
-  - task:
-      type: text-generation
-    dataset:
-      name: LaRoSeDa_multiclass
-      type: LaRoSeDa_multiclass
-    metrics:
-    - name: 0-shot
-      type: macro-f1
-      value: 46.41
-    - name: 1-shot
-      type: macro-f1
-      value: 67.36
-    - name: 3-shot
-      type: macro-f1
-      value: 65.16
-    - name: 5-shot
-      type: macro-f1
-      value: 65.23
-  - task:
-      type: text-generation
-    dataset:
-      name: WMT_EN-RO
-      type: WMT_EN-RO
-    metrics:
-    - name: 0-shot
-      type: bleu
-      value: 4.45
-    - name: 1-shot
-      type: bleu
-      value: 8.61
-    - name: 3-shot
-      type: bleu
-      value: 12.25
-    - name: 5-shot
-      type: bleu
-      value: 14.73
-  - task:
-      type: text-generation
-    dataset:
-      name: WMT_RO-EN
-      type: WMT_RO-EN
-    metrics:
-    - name: 0-shot
-      type: bleu
-      value: 1.29
-    - name: 1-shot
-      type: bleu
-      value: 10.78
-    - name: 3-shot
-      type: bleu
-      value: 16.82
-    - name: 5-shot
-      type: bleu
-      value: 23.24
-  - task:
-      type: text-generation
-    dataset:
-      name: XQuAD_EM
-      type: XQuAD_EM
-    metrics:
-    - name: 0-shot
-      type: exact_match
-      value: 5.29
-    - name: 1-shot
-      type: exact_match
-      value: 33.95
-    - name: 3-shot
-      type: exact_match
-      value: 39.24
-    - name: 5-shot
-      type: exact_match
-      value: 42.1
-  - task:
-      type: text-generation
-    dataset:
-      name: XQuAD_F1
-      type: XQuAD_F1
-    metrics:
-    - name: 0-shot
-      type: f1
-      value: 16.17
-    - name: 1-shot
-      type: f1
-      value: 51.84
-    - name: 3-shot
-      type: f1
-      value: 58.82
-    - name: 5-shot
-      type: f1
-      value: 61.29
-  - task:
-      type: text-generation
-    dataset:
-      name: STS
-      type: STS
-    metrics:
-    - name: 0-shot
-      type: spearman
-      value: -1.74
-    - name: 1-shot
-      type: spearman
-      value: 15.47
-    - name: 3-shot
-      type: spearman
-      value: 9.93
-  - task:
-      type: text-generation
-    dataset:
-      name: STS
-      type: STS
-    metrics:
-    - name: 0-shot
-      type: pearson
-      value: -1.4
-    - name: 1-shot
-      type: pearson
-      value: 15
-    - name: 3-shot
-      type: pearson
-      value: 10.33
 datasets:
 - uonlp/CulturaX
 ---
 # Model Card for Model ID

 language:
 - ro
 base_model: meta-llama/Llama-2-7b-hf
 datasets:
 - uonlp/CulturaX
+model-index:
+    - name: OpenLLM-Ro/RoLlama2-7b-Base-2024-05-14
+      results:
+        - task:
+            type: text-generation
+          dataset:
+            name: Romanian_Academic_Benchmarks
+            type: Romanian_Academic_Benchmarks
+          metrics:
+            - name: Average accuracy
+              type: accuracy
+              value: 38.03
+        - task:
+            type: text-generation
+          dataset:
+            name: OpenLLM-Ro/ro_arc_challenge
+            type: OpenLLM-Ro/ro_arc_challenge
+          metrics:
+            - name: Average accuracy
+              type: accuracy
+              value: 37.95
+        - task:
+            type: text-generation
+          dataset:
+            name: OpenLLM-Ro/ro_mmlu
+            type: OpenLLM-Ro/ro_mmlu
+          metrics:
+            - name: Average accuracy
+              type: accuracy
+              value: 27.22
+        - task:
+            type: text-generation
+          dataset:
+            name: OpenLLM-Ro/ro_winogrande
+            type: OpenLLM-Ro/ro_winogrande
+          metrics:
+            - name: Average accuracy
+              type: accuracy
+              value: 59.29
+        - task:
+            type: text-generation
+          dataset:
+            name: OpenLLM-Ro/ro_hellaswag
+            type: OpenLLM-Ro/ro_hellaswag
+          metrics:
+            - name: Average accuracy
+              type: accuracy
+              value: 57.22
+        - task:
+            type: text-generation
+          dataset:
+            name: OpenLLM-Ro/ro_gsm8k
+            type: OpenLLM-Ro/ro_gsm8k
+          metrics:
+            - name: Average accuracy
+              type: accuracy
+              value: 2.53
+        - task:
+            type: text-generation
+          dataset:
+            name: OpenLLM-Ro/ro_truthfulqa
+            type: OpenLLM-Ro/ro_truthfulqa
+          metrics:
+            - name: Average accuracy
+              type: accuracy
+              value: 44.00
+        - task:
+            type: text-generation
+          dataset:
+            name: LaRoSeDa_binary
+            type: LaRoSeDa_binary
+          metrics:
+            - name: Average macro-f1
+              type: macro-f1
+              value: 83.25
+        - task:
+            type: text-generation
+          dataset:
+            name: LaRoSeDa_multiclass
+            type: LaRoSeDa_multiclass
+          metrics:
+            - name: Average macro-f1
+              type: macro-f1
+              value: 61.04
+        - task:
+            type: text-generation
+          dataset:
+            name: LaRoSeDa_binary_finetuned
+            type: LaRoSeDa_binary_finetuned
+          metrics:
+            - name: Average macro-f1
+              type: macro-f1
+              value: 98.97
+        - task:
+            type: text-generation
+          dataset:
+            name: LaRoSeDa_multiclass_finetuned
+            type: LaRoSeDa_multiclass_finetuned
+          metrics:
+            - name: Average macro-f1
+              type: macro-f1
+              value: 87.72
+        - task:
+            type: text-generation
+          dataset:
+            name: WMT_EN-RO
+            type: WMT_EN-RO
+          metrics:
+            - name: Average bleu
+              type: bleu
+              value: 10.01
+        - task:
+            type: text-generation
+          dataset:
+            name: WMT_RO-EN
+            type: WMT_RO-EN
+          metrics:
+            - name: Average bleu
+              type: bleu
+              value: 13.03
+        - task:
+            type: text-generation
+          dataset:
+            name: WMT_EN-RO_finetuned
+            type: WMT_EN-RO_finetuned
+          metrics:
+            - name: Average bleu
+              type: bleu
+              value: 27.85
+        - task:
+            type: text-generation
+          dataset:
+            name: WMT_RO-EN_finetuned
+            type: WMT_RO-EN_finetuned
+          metrics:
+            - name: Average bleu
+              type: bleu
+              value: 39.30
+        - task:
+            type: text-generation
+          dataset:
+            name: XQuAD
+            type: XQuAD
+          metrics:
+            - name: Average exact_match
+              type: exact_match
+              value: 30.15
+        - task:
+            type: text-generation
+          dataset:
+            name: XQuAD
+            type: XQuAD
+          metrics:
+            - name: Average f1
+              type: f1
+              value: 47.03
+        - task:
+            type: text-generation
+          dataset:
+            name: XQuAD_finetuned
+            type: XQuAD_finetuned
+          metrics:
+            - name: Average exact_match
+              type: exact_match
+              value: 67.06
+        - task:
+            type: text-generation
+          dataset:
+            name: XQuAD_finetuned
+            type: XQuAD_finetuned
+          metrics:
+            - name: Average f1
+              type: f1
+              value: 79.96
+        - task:
+            type: text-generation
+          dataset:
+            name: STS
+            type: STS
+          metrics:
+            - name: Average spearman
+              type: spearman
+              value: 7.89
+        - task:
+            type: text-generation
+          dataset:
+            name: STS
+            type: STS
+          metrics:
+            - name: Average pearson
+              type: pearson
+              value: 7.98
+        - task:
+            type: text-generation
+          dataset:
+            name: STS_finetuned
+            type: STS_finetuned
+          metrics:
+            - name: Average spearman
+              type: spearman
+              value: 71.75
+        - task:
+            type: text-generation
+          dataset:
+            name: STS_finetuned
+            type: STS_finetuned
+          metrics:
+            - name: Average pearson
+              type: pearson
+              value: 71.99
+        - task:
+            type: text-generation
+          dataset:
+            name: OpenLLM-Ro/ro_arc_challenge
+            type: OpenLLM-Ro/ro_arc_challenge
+          metrics:
+            - name: 0-shot
+              type: accuracy
+              value: 35.56
+            - name: 1-shot
+              type: accuracy
+              value: 36.42
+            - name: 3-shot
+              type: accuracy
+              value: 38.56
+            - name: 5-shot
+              type: accuracy
+              value: 38.39
+            - name: 10-shot
+              type: accuracy
+              value: 39.07
+            - name: 25-shot
+              type: accuracy
+              value: 39.67
+        - task:
+            type: text-generation
+          dataset:
+            name: OpenLLM-Ro/ro_mmlu
+            type: OpenLLM-Ro/ro_mmlu
+          metrics:
+            - name: 0-shot
+              type: accuracy
+              value: 25.82
+            - name: 1-shot
+              type: accuracy
+              value: 25.48
+            - name: 3-shot
+              type: accuracy
+              value: 27.61
+            - name: 5-shot
+              type: accuracy
+              value: 29.96
+        - task:
+            type: text-generation
+          dataset:
+            name: OpenLLM-Ro/ro_winogrande
+            type: OpenLLM-Ro/ro_winogrande
+          metrics:
+            - name: 0-shot
+              type: accuracy
+              value: 58.72
+            - name: 1-shot
+              type: accuracy
+              value: 58.88
+            - name: 3-shot
+              type: accuracy
+              value: 60.38
+            - name: 5-shot
+              type: accuracy
+              value: 59.19
+        - task:
+            type: text-generation
+          dataset:
+            name: OpenLLM-Ro/ro_hellaswag
+            type: OpenLLM-Ro/ro_hellaswag
+          metrics:
+            - name: 0-shot
+              type: accuracy
+              value: 55.85
+            - name: 1-shot
+              type: accuracy
+              value: 57.06
+            - name: 3-shot
+              type: accuracy
+              value: 57.52
+            - name: 5-shot
+              type: accuracy
+              value: 57.89
+            - name: 10-shot
+              type: accuracy
+              value: 57.79
+        - task:
+            type: text-generation
+          dataset:
+            name: OpenLLM-Ro/ro_gsm8k
+            type: OpenLLM-Ro/ro_gsm8k
+          metrics:
+            - name: 0-shot
+              type: accuracy
+              value: 0.00
+            - name: 1-shot
+              type: accuracy
+              value: 2.96
+            - name: 3-shot
+              type: accuracy
+              value: 4.62
+        - task:
+            type: text-generation
+          dataset:
+            name: LaRoSeDa_binary
+            type: LaRoSeDa_binary
+          metrics:
+            - name: 0-shot
+              type: macro-f1
+              value: 42.78
+            - name: 1-shot
+              type: macro-f1
+              value: 98.00
+            - name: 3-shot
+              type: macro-f1
+              value: 95.13
+            - name: 5-shot
+              type: macro-f1
+              value: 97.07
+        - task:
+            type: text-generation
+          dataset:
+            name: LaRoSeDa_multiclass
+            type: LaRoSeDa_multiclass
+          metrics:
+            - name: 0-shot
+              type: macro-f1
+              value: 46.41
+            - name: 1-shot
+              type: macro-f1
+              value: 67.36
+            - name: 3-shot
+              type: macro-f1
+              value: 65.16
+            - name: 5-shot
+              type: macro-f1
+              value: 65.23
+        - task:
+            type: text-generation
+          dataset:
+            name: WMT_EN-RO
+            type: WMT_EN-RO
+          metrics:
+            - name: 0-shot
+              type: bleu
+              value: 4.45
+            - name: 1-shot
+              type: bleu
+              value: 8.61
+            - name: 3-shot
+              type: bleu
+              value: 12.25
+            - name: 5-shot
+              type: bleu
+              value: 14.73
+        - task:
+            type: text-generation
+          dataset:
+            name: WMT_RO-EN
+            type: WMT_RO-EN
+          metrics:
+            - name: 0-shot
+              type: bleu
+              value: 1.29
+            - name: 1-shot
+              type: bleu
+              value: 10.78
+            - name: 3-shot
+              type: bleu
+              value: 16.82
+            - name: 5-shot
+              type: bleu
+              value: 23.24
+        - task:
+            type: text-generation
+          dataset:
+            name: XQuAD_EM
+            type: XQuAD_EM
+          metrics:
+            - name: 0-shot
+              type: exact_match
+              value: 5.29
+            - name: 1-shot
+              type: exact_match
+              value: 33.95
+            - name: 3-shot
+              type: exact_match
+              value: 39.24
+            - name: 5-shot
+              type: exact_match
+              value: 42.10
+        - task:
+            type: text-generation
+          dataset:
+            name: XQuAD_F1
+            type: XQuAD_F1
+          metrics:
+            - name: 0-shot
+              type: f1
+              value: 16.17
+            - name: 1-shot
+              type: f1
+              value: 51.84
+            - name: 3-shot
+              type: f1
+              value: 58.82
+            - name: 5-shot
+              type: f1
+              value: 61.29
+        - task:
+            type: text-generation
+          dataset:
+            name: STS_Spearman
+            type: STS_Spearman
+          metrics:
+            - name: 1-shot
+              type: spearman
+              value: -1.74
+            - name: 3-shot
+              type: spearman
+              value: 15.47
+            - name: 5-shot
+              type: spearman
+              value: 9.93
+        - task:
+            type: text-generation
+          dataset:
+            name: STS_Pearson
+            type: STS_Pearson
+          metrics:
+            - name: 1-shot
+              type: pearson
+              value: -1.40
+            - name: 3-shot
+              type: pearson
+              value: 15.00
+            - name: 5-shot
+              type: pearson
+              value: 10.33
 ---
 # Model Card for Model ID