Yotam-Perlitz commited on
Commit
32f9aec
1 Parent(s): a3b611d

replace mybench

Browse files

Signed-off-by: Yotam-Perlitz <[email protected]>

Files changed (1) hide show
  1. assets/mybench_240901.csv +28 -53
assets/mybench_240901.csv CHANGED
@@ -1,53 +1,28 @@
1
- model,score,scenario
2
- claude_3_5_sonnet_20240620,61.16,mybench_average
3
- gpt_4o_2024_05_13,54.96,mybench_average
4
- gpt_4_turbo_2024_04_09,53,mybench_average
5
- gpt_4_1106_preview,52.17,mybench_average
6
- claude_3_opus_20240229,50.75,mybench_average
7
- gpt_4_0125_preview,49.39,mybench_average
8
- deepseek_coder_v2,46.79,mybench_average
9
- gemini_1.5_pro_api_0514,44.35,mybench_average
10
- gemma_2_27b_it,41.22,mybench_average
11
- gemini_1.5_flash_api_0514,40.89,mybench_average
12
- qwen2_72b_instruct,40.16,mybench_average
13
- acm_rewrite_qwen2_72b_chat,39.6,mybench_average
14
- mistral_large_2402,38.92,mybench_average
15
- deepseek_chat_v2,38.39,mybench_average
16
- claude_3_sonnet_20240229,38.08,mybench_average
17
- meta_llama_3_70b_instruct,37.38,mybench_average
18
- claude_3_haiku_20240307,35.32,mybench_average
19
- mixtral_8x22b_instruct_v0.1,34.84,mybench_average
20
- gpt_3.5_turbo_0125,34.43,mybench_average
21
- gpt_3.5_turbo_1106,34.14,mybench_average
22
- command_r_plus,32.86,mybench_average
23
- mistral_small_2402,32.8,mybench_average
24
- gemma_2_9b_it,31.57,mybench_average
25
- phi_3_medium_4k_instruct,30.33,mybench_average
26
- phi_3_medium_128k_instruct,29.64,mybench_average
27
- deepseek_coder_v2_lite_instruct,29.15,mybench_average
28
- qwen1.5_110b_chat,28.96,mybench_average
29
- qwen1.5_72b_chat,28.89,mybench_average
30
- command_r,27.23,mybench_average
31
- phi_3_small_128k_instruct,27.19,mybench_average
32
- meta_llama_3_8b_instruct,26.67,mybench_average
33
- qwen2_7b_instruct,26.45,mybench_average
34
- phi_3_small_8k_instruct,26.24,mybench_average
35
- openhermes_2.5_mistral_7b,23.3,mybench_average
36
- mixtral_8x7b_instruct_v0.1,22.5,mybench_average
37
- mistral_7b_instruct_v0.2,19.33,mybench_average
38
- phi_3_mini_4k_instruct,19.27,mybench_average
39
- zephyr_7b_alpha,19.22,mybench_average
40
- phi_3_mini_128k_instruct,18.04,mybench_average
41
- zephyr_7b_beta,17.32,mybench_average
42
- deepseek_v2_lite_chat,17.14,mybench_average
43
- qwen1.5_7b_chat,16.5,mybench_average
44
- starling_lm_7b_beta,16.44,mybench_average
45
- vicuna_7b_v1.5_16k,13.71,mybench_average
46
- vicuna_7b_v1.5,11.73,mybench_average
47
- qwen1.5_4b_chat,11.13,mybench_average
48
- llama_2_7b_chat,10.25,mybench_average
49
- qwen2_1.5b_instruct,9.96,mybench_average
50
- yi_6b_chat,8.79,mybench_average
51
- qwen2_0.5b_instruct,6.78,mybench_average
52
- qwen1.5_1.8b_chat,6.09,mybench_average
53
- qwen1.5_0.5b_chat,5.26,mybench_average
 
1
+ model,agentbench
2
+ gpt-4-0613,4.01
3
+ claude-2,2.49
4
+ claude-v1.3,2.44
5
+ gpt-3.5-turbo-0613,2.32
6
+ text-davinci-003,1.71
7
+ claude-instant-v1.1,1.60
8
+ chat-bison-001,1.39
9
+ text-davinci-002,1.25
10
+ llama-2-70b-chat,0.78
11
+ guanaco-65b,0.54
12
+ codellama-34b-instruct,0.96
13
+ vicuna-33b-v1.3,0.73
14
+ wizardlm-30b-v1.0,0.46
15
+ guanaco-33b,0.39
16
+ vicuna-13b-v1.5,0.93
17
+ llama-2-13b-chat,0.77
18
+ openchat-13b-v3.2,0.70
19
+ wizardlm-13b-v1.2,0.66
20
+ vicuna-7b-v1.5,0.56
21
+ codellama-13b-instruct,0.56
22
+ codellama-7b-instruct,0.50
23
+ koala-13b,0.34
24
+ llama-2-7b-chat,0.34
25
+ codegeex2-6b,0.27
26
+ dolly-12b-v2,0.14
27
+ chatglm-6b-v1.1,0.11
28
+ oasst-12b-sft-4,0.03