Weyaxi commited on
Commit
0954803
1 Parent(s): a69ad7a

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +72 -51
README.md CHANGED
@@ -1,98 +1,119 @@
1
  ---
2
  license: apache-2.0
3
  model-index:
4
- - name: metadatatest
5
  results:
 
6
  - task:
7
  type: text-generation
 
8
  dataset:
9
- name: "ai2_arc"
10
- type: "ai2_arc"
 
 
 
 
11
  metrics:
12
- - name: AI2 Reasoning Challenge (25-Shot)
13
- type: AI2 Reasoning Challenge (25-Shot)
14
- value: 54.3921
15
  source:
16
  name: Open LLM Leaderboard
17
- url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard
18
-
19
- - task:
20
- type: text-generation
21
- dataset:
22
- name: "hellaswag"
23
- type: "hellaswag"
24
- metrics:
25
- - name: HellaSwag (10-shot)
26
- type: HellaSwag (10-shot)
27
- value: 54.3921
28
- source:
29
- name: Open LLM Leaderboard
30
- url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard
31
-
32
  - task:
33
  type: text-generation
 
34
  dataset:
35
- name: "mmlu"
36
- type: "mmlu"
 
 
 
37
  metrics:
38
- - name: MMLU (5-Shot)
39
- type: MMLU (5-Shot)
40
- value: 54.3921
41
  source:
42
  name: Open LLM Leaderboard
43
- url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard
44
 
 
45
  - task:
46
  type: text-generation
 
47
  dataset:
48
- name: "truthful_qa"
49
- type: "truthful_qa"
 
 
 
 
50
  metrics:
51
- - name: TruthfulQA (0-shot)
52
- type: TruthfulQA (0-shot)
53
- value: 54.3921
54
  source:
55
  name: Open LLM Leaderboard
56
- url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard
57
 
 
58
  - task:
59
  type: text-generation
 
60
  dataset:
61
- name: "winogrande"
62
- type: "winogrande"
 
 
 
 
63
  metrics:
64
- - name: Winogrande (5-shot)
65
- type: Winogrande (5-shot)
66
- value: 49
67
  source:
68
  name: Open LLM Leaderboard
69
- url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard
70
 
 
71
  - task:
72
  type: text-generation
 
73
  dataset:
74
- name: "gsm8k"
75
- type: "gsm8k"
 
 
 
 
76
  metrics:
77
- - name: GSM8k (5-shot)
78
- type: GSM8k (5-shot)
79
- value: 5
80
  source:
81
  name: Open LLM Leaderboard
82
- url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard
83
 
 
84
  - task:
85
  type: text-generation
 
86
  dataset:
87
- name: "drop"
88
- type: "drop"
 
 
 
 
89
  metrics:
90
- - name: DROP (3-shot)
91
- type: DROP (3-shot)
92
- value: 9
93
  source:
94
  name: Open LLM Leaderboard
95
- url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard
96
 
97
 
98
  ---
 
1
  ---
2
  license: apache-2.0
3
  model-index:
4
+ - name: metadata-test
5
  results:
6
+ # AI2 Reasoning Challenge (25-Shot)
7
  - task:
8
  type: text-generation
9
+ name: Text Generation
10
  dataset:
11
+ name: AI2 Reasoning Challenge (25-Shot)
12
+ type: ai2_arc
13
+ config: ARC-Challenge
14
+ split: test
15
+ args:
16
+ num_few_shot: 25
17
  metrics:
18
+ - type: acc_norm
19
+ name: normalized accuracy
20
+ value: 0.6203071672354948
21
  source:
22
  name: Open LLM Leaderboard
23
+ url: https://huggingface.co/datasets/open-llm-leaderboard/details_
24
+
25
+ # HellaSwag (10-shot)
 
 
 
 
 
 
 
 
 
 
 
 
26
  - task:
27
  type: text-generation
28
+ name: Text Generation
29
  dataset:
30
+ name: HellaSwag (10-Shot)
31
+ type: hellaswag
32
+ split: validation
33
+ args:
34
+ num_few_shot: 10
35
  metrics:
36
+ - type: acc_norm
37
+ name: normalized accuracy
38
+ value: 0.8435570603465445
39
  source:
40
  name: Open LLM Leaderboard
41
+ url: https://huggingface.co/datasets/open-llm-leaderboard/details_
42
 
43
+ # TruthfulQA (0-shot)
44
  - task:
45
  type: text-generation
46
+ name: Text Generation
47
  dataset:
48
+ name: TruthfulQA (0-shot)
49
+ type: truthful_qa
50
+ config: multiple_choice
51
+ split: validation
52
+ args:
53
+ num_few_shot: 0
54
  metrics:
55
+ - type: mc2
56
+ value: 0.5744916942762855
 
57
  source:
58
  name: Open LLM Leaderboard
59
+ url: https://huggingface.co/datasets/open-llm-leaderboard/details_
60
 
61
+ # GSM8k (5-shot)
62
  - task:
63
  type: text-generation
64
+ name: Text Generation
65
  dataset:
66
+ name: GSM8k (5-shot)
67
+ type: gsm8k
68
+ config: main
69
+ split: test
70
+ args:
71
+ num_few_shot: 5
72
  metrics:
73
+ - type: acc
74
+ name: accuracy
75
+ value: 0.12736921910538287
76
  source:
77
  name: Open LLM Leaderboard
78
+ url: https://huggingface.co/datasets/open-llm-leaderboard/details_
79
 
80
+ # MMLU (5-Shot)
81
  - task:
82
  type: text-generation
83
+ name: Text Generation
84
  dataset:
85
+ name: MMLU (5-Shot)
86
+ type: cais/mmlu
87
+ config: all
88
+ split: test
89
+ args:
90
+ num_few_shot: 5
91
  metrics:
92
+ - type: acc
93
+ name: accuracy
94
+ value: 0.6107
95
  source:
96
  name: Open LLM Leaderboard
97
+ url: https://huggingface.co/datasets/open-llm-leaderboard/details_
98
 
99
+ # Winogrande (5-shot)
100
  - task:
101
  type: text-generation
102
+ name: Text Generation
103
  dataset:
104
+ name: Winogrande (5-shot)
105
+ type: winogrande
106
+ config: winogrande_xl
107
+ split: validation
108
+ args:
109
+ num_few_shot: 5
110
  metrics:
111
+ - type: acc
112
+ name: accuracy
113
+ value: 0.7774269928966061
114
  source:
115
  name: Open LLM Leaderboard
116
+ url: https://huggingface.co/datasets/open-llm-leaderboard/details_
117
 
118
 
119
  ---