Spaces:
Running
Running
Yotam-Perlitz
commited on
Commit
•
32f9aec
1
Parent(s):
a3b611d
replace mybench
Browse filesSigned-off-by: Yotam-Perlitz <[email protected]>
- assets/mybench_240901.csv +28 -53
assets/mybench_240901.csv
CHANGED
@@ -1,53 +1,28 @@
|
|
1 |
-
model,
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
qwen1.5_72b_chat,28.89,mybench_average
|
30 |
-
command_r,27.23,mybench_average
|
31 |
-
phi_3_small_128k_instruct,27.19,mybench_average
|
32 |
-
meta_llama_3_8b_instruct,26.67,mybench_average
|
33 |
-
qwen2_7b_instruct,26.45,mybench_average
|
34 |
-
phi_3_small_8k_instruct,26.24,mybench_average
|
35 |
-
openhermes_2.5_mistral_7b,23.3,mybench_average
|
36 |
-
mixtral_8x7b_instruct_v0.1,22.5,mybench_average
|
37 |
-
mistral_7b_instruct_v0.2,19.33,mybench_average
|
38 |
-
phi_3_mini_4k_instruct,19.27,mybench_average
|
39 |
-
zephyr_7b_alpha,19.22,mybench_average
|
40 |
-
phi_3_mini_128k_instruct,18.04,mybench_average
|
41 |
-
zephyr_7b_beta,17.32,mybench_average
|
42 |
-
deepseek_v2_lite_chat,17.14,mybench_average
|
43 |
-
qwen1.5_7b_chat,16.5,mybench_average
|
44 |
-
starling_lm_7b_beta,16.44,mybench_average
|
45 |
-
vicuna_7b_v1.5_16k,13.71,mybench_average
|
46 |
-
vicuna_7b_v1.5,11.73,mybench_average
|
47 |
-
qwen1.5_4b_chat,11.13,mybench_average
|
48 |
-
llama_2_7b_chat,10.25,mybench_average
|
49 |
-
qwen2_1.5b_instruct,9.96,mybench_average
|
50 |
-
yi_6b_chat,8.79,mybench_average
|
51 |
-
qwen2_0.5b_instruct,6.78,mybench_average
|
52 |
-
qwen1.5_1.8b_chat,6.09,mybench_average
|
53 |
-
qwen1.5_0.5b_chat,5.26,mybench_average
|
|
|
1 |
+
model,agentbench
|
2 |
+
gpt-4-0613,4.01
|
3 |
+
claude-2,2.49
|
4 |
+
claude-v1.3,2.44
|
5 |
+
gpt-3.5-turbo-0613,2.32
|
6 |
+
text-davinci-003,1.71
|
7 |
+
claude-instant-v1.1,1.60
|
8 |
+
chat-bison-001,1.39
|
9 |
+
text-davinci-002,1.25
|
10 |
+
llama-2-70b-chat,0.78
|
11 |
+
guanaco-65b,0.54
|
12 |
+
codellama-34b-instruct,0.96
|
13 |
+
vicuna-33b-v1.3,0.73
|
14 |
+
wizardlm-30b-v1.0,0.46
|
15 |
+
guanaco-33b,0.39
|
16 |
+
vicuna-13b-v1.5,0.93
|
17 |
+
llama-2-13b-chat,0.77
|
18 |
+
openchat-13b-v3.2,0.70
|
19 |
+
wizardlm-13b-v1.2,0.66
|
20 |
+
vicuna-7b-v1.5,0.56
|
21 |
+
codellama-13b-instruct,0.56
|
22 |
+
codellama-7b-instruct,0.50
|
23 |
+
koala-13b,0.34
|
24 |
+
llama-2-7b-chat,0.34
|
25 |
+
codegeex2-6b,0.27
|
26 |
+
dolly-12b-v2,0.14
|
27 |
+
chatglm-6b-v1.1,0.11
|
28 |
+
oasst-12b-sft-4,0.03
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|