Taishi-N324 commited on
Commit
5ebac3e
·
verified ·
1 Parent(s): 001a0c0

Upload aggregated_result.json

Browse files
Files changed (1) hide show
  1. aggregated_result.json +122 -0
aggregated_result.json ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": "/gs/bs/tgh-NII-LLM/fp32_to_hf/upcycle-Mixtral-8x152M-torch_rand_002_iter_0477000_main_zero3/lr_2e-4-minlr_2e-5_warmup_2000_seq_4096/iter_0238419",
3
+ "result": {
4
+ "XLSUM_ja_1shot": -1.0,
5
+ "MATH (mgsm_ja)": -1.0,
6
+ "wmt20_en_ja_bleu": -1.0,
7
+ "wmt20_ja_en_bleu": -1.0,
8
+ "MC": -1.0,
9
+ "NLI": -1.0,
10
+ "QA": -1.0,
11
+ "RC": -1.0,
12
+ "jamp (NLI)": -1.0,
13
+ "janli (NLI)": -1.0,
14
+ "jcommonsenseqa": -1.0,
15
+ "jemhopqa": -1.0,
16
+ "jnli": -1.0,
17
+ "jsem": -1.0,
18
+ "jsick (NLI)": -1.0,
19
+ "jsquad": -1.0,
20
+ "jsts_pearson": -1.0,
21
+ "jsts_spearman": -1.0,
22
+ "niilc": -1.0,
23
+ "jmmlu": -1.0,
24
+ "jmmlu_social_sciences": -1.0,
25
+ "jmmlu_humanities": -1.0,
26
+ "jmmlu_stem": -1.0,
27
+ "jmmlu_other": -1.0,
28
+ "jhumaneval@1": -1.0,
29
+ "jhumaneval@10": -1.0,
30
+ "jhumaneval_answer@10": -1.0,
31
+ "MT-Bench (ALL)": -1.0,
32
+ "writing": -1.0,
33
+ "roleplay": -1.0,
34
+ "reasoning": -1.0,
35
+ "math": -1.0,
36
+ "coding": -1.0,
37
+ "extraction": -1.0,
38
+ "stem": -1.0,
39
+ "humanities": -1.0,
40
+ "gsm8k": -1.0,
41
+ "squad2": -1.0,
42
+ "triviaqa": -1.0,
43
+ "hellaswag": -1.0,
44
+ "openbookqa": -1.0,
45
+ "xwinograd_en": -1.0,
46
+ "bbh_cot": -1.0,
47
+ "mmlu": -1.0,
48
+ "mmlu_social_sciences": -1.0,
49
+ "mmlu_humanities": -1.0,
50
+ "mmlu_stem": -1.0,
51
+ "mmlu_other": -1.0,
52
+ "humaneval@1": -1.0,
53
+ "humaneval@10": -1.0,
54
+ "humaneval_answer@10": -1.0,
55
+ "jhumaneval-unstripped@1": -1.0,
56
+ "jhumaneval-unstripped@10": -1.0,
57
+ "jhumaneval-unstripped_answer@10": -1.0,
58
+ "humaneval-unstripped@1": -1.0,
59
+ "humaneval-unstripped@10": -1.0,
60
+ "humaneval-unstripped_answer@10": -1.0
61
+ },
62
+ "overall": "-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0",
63
+ "tasks": [
64
+ "XLSUM_ja_1shot",
65
+ "MATH (mgsm_ja)",
66
+ "wmt20_en_ja_bleu",
67
+ "wmt20_ja_en_bleu",
68
+ "MC",
69
+ "NLI",
70
+ "QA",
71
+ "RC",
72
+ "jamp (NLI)",
73
+ "janli (NLI)",
74
+ "jcommonsenseqa",
75
+ "jemhopqa",
76
+ "jnli",
77
+ "jsem",
78
+ "jsick (NLI)",
79
+ "jsquad",
80
+ "jsts_pearson",
81
+ "jsts_spearman",
82
+ "niilc",
83
+ "jmmlu",
84
+ "jmmlu_social_sciences",
85
+ "jmmlu_humanities",
86
+ "jmmlu_stem",
87
+ "jmmlu_other",
88
+ "jhumaneval@1",
89
+ "jhumaneval@10",
90
+ "jhumaneval_answer@10",
91
+ "MT-Bench (ALL)",
92
+ "writing",
93
+ "roleplay",
94
+ "reasoning",
95
+ "math",
96
+ "coding",
97
+ "extraction",
98
+ "stem",
99
+ "humanities",
100
+ "gsm8k",
101
+ "squad2",
102
+ "triviaqa",
103
+ "hellaswag",
104
+ "openbookqa",
105
+ "xwinograd_en",
106
+ "bbh_cot",
107
+ "mmlu",
108
+ "mmlu_social_sciences",
109
+ "mmlu_humanities",
110
+ "mmlu_stem",
111
+ "mmlu_other",
112
+ "humaneval@1",
113
+ "humaneval@10",
114
+ "humaneval_answer@10",
115
+ "jhumaneval-unstripped@1",
116
+ "jhumaneval-unstripped@10",
117
+ "jhumaneval-unstripped_answer@10",
118
+ "humaneval-unstripped@1",
119
+ "humaneval-unstripped@10",
120
+ "humaneval-unstripped_answer@10"
121
+ ]
122
+ }