safetybat / assets /combined_holistic_20240708.csv
Yotam Perlitz
build app
0f8e886
raw
history blame
47.9 kB
model,score,scenario,source,aggragated_from
gpt_4_turbo_2024_04_09,82.6,arena_hard,arena_hard_2404,[]
gpt_4_0125_preview,78.0,arena_hard,arena_hard_2404,[]
gemini_1.5_pro_api_preview,72.0,arena_hard,arena_hard_2404,[]
yi_large,63.7,arena_hard,arena_hard_2404,[]
claude_3_opus_20240229,60.4,arena_hard,arena_hard_2404,[]
glm_4,55.7,arena_hard,arena_hard_2404,[]
gpt_4_0314,50.0,arena_hard,arena_hard_2404,[]
gemini_1.5_flash_api_preview,49.6,arena_hard,arena_hard_2404,[]
claude_3_sonnet_20240229,46.8,arena_hard,arena_hard_2404,[]
claude_3_haiku_20240307,41.5,arena_hard,arena_hard_2404,[]
llama_3_70b_chat,41.1,arena_hard,arena_hard_2404,[]
gpt_4_0613,37.9,arena_hard,arena_hard_2404,[]
mistral_large_2402,37.7,arena_hard,arena_hard_2404,[]
mixtral_8x22b_instruct_v0.1,36.4,arena_hard,arena_hard_2404,[]
qwen1.5_72b_chat,36.1,arena_hard,arena_hard_2404,[]
command_r_plus,33.1,arena_hard,arena_hard_2404,[]
mistral_medium,31.9,arena_hard,arena_hard_2404,[]
mistral_next,27.4,arena_hard,arena_hard_2404,[]
gpt_3.5_turbo_0613,24.8,arena_hard,arena_hard_2404,[]
claude_2.0,24.0,arena_hard,arena_hard_2404,[]
dbrx_instructruct,23.9,arena_hard,arena_hard_2404,[]
mixtral_8x7b_instruct_v0.1,23.4,arena_hard,arena_hard_2404,[]
gpt_3.5_turbo_0125,23.3,arena_hard,arena_hard_2404,[]
yi_34b_chat,23.1,arena_hard,arena_hard_2404,[]
starling_lm_7b_beta,23.0,arena_hard,arena_hard_2404,[]
claude_2.1,22.8,arena_hard,arena_hard_2404,[]
snorkel_mistral_pairrm_dpo,20.7,arena_hard,arena_hard_2404,[]
llama_3_8b_chat,20.6,arena_hard,arena_hard_2404,[]
gpt_3.5_turbo_1106,18.9,arena_hard,arena_hard_2404,[]
gpt_3.5_turbo_0301,18.1,arena_hard,arena_hard_2404,[]
gemini_1.0_pro,17.8,arena_hard,arena_hard_2404,[]
snowflake_arctic_instruct,17.6,arena_hard,arena_hard_2404,[]
command_r,17.0,arena_hard,arena_hard_2404,[]
phi_3_mini_128k_instruct,15.4,arena_hard,arena_hard_2404,[]
tulu_2_dpo_70b,15.0,arena_hard,arena_hard_2404,[]
starling_lm_7b_alpha,12.8,arena_hard,arena_hard_2404,[]
mistral_7b_instruct,12.6,arena_hard,arena_hard_2404,[]
gemma_1.1_7b_it,12.1,arena_hard,arena_hard_2404,[]
llama_2_70b_chat,11.6,arena_hard,arena_hard_2404,[]
vicuna_33b_v1.3,8.6,arena_hard,arena_hard_2404,[]
gemma_7b_it,7.5,arena_hard,arena_hard_2404,[]
llama_2_7b_chat,4.6,arena_hard,arena_hard_2404,[]
gemma_1.1_2b_it,3.4,arena_hard,arena_hard_2404,[]
gemma_2b_it,3.0,arena_hard,arena_hard_2404,[]
gpt_4o_2024_05_13,87.9,mixeval,mixeval_240601,[]
claude_3_opus,88.1,mixeval,mixeval_240601,[]
gpt_4_turbo_2024_04_09,88.8,mixeval,mixeval_240601,[]
gemini_1.5_pro_api_0409,84.2,mixeval,mixeval_240601,[]
yi_large_preview,84.4,mixeval,mixeval_240601,[]
llama_3_70b_instruct,84.0,mixeval,mixeval_240601,[]
qwen_max_0428,86.1,mixeval,mixeval_240601,[]
claude_3_sonnet,81.7,mixeval,mixeval_240601,[]
reka_core_20240415,83.3,mixeval,mixeval_240601,[]
mammoth2_8x7b_plus,81.5,mixeval,mixeval_240601,[]
deepseek_v2,83.7,mixeval,mixeval_240601,[]
command_r_plus,81.5,mixeval,mixeval_240601,[]
yi_1.5_34b_chat,81.7,mixeval,mixeval_240601,[]
mistral_large,84.2,mixeval,mixeval_240601,[]
qwen1.5_72b_chat,84.1,mixeval,mixeval_240601,[]
mistral_medium,81.9,mixeval,mixeval_240601,[]
gemini_1.0_pro,78.9,mixeval,mixeval_240601,[]
reka_flash_20240226,79.8,mixeval,mixeval_240601,[]
mistral_small,81.2,mixeval,mixeval_240601,[]
llama_3_8b_instruct,75.0,mixeval,mixeval_240601,[]
command_r,77.0,mixeval,mixeval_240601,[]
qwen1.5_32b_chat,81.0,mixeval,mixeval_240601,[]
gpt_3.5_turbo_0125,79.7,mixeval,mixeval_240601,[]
claude_3_haiku,79.7,mixeval,mixeval_240601,[]
yi_34b_chat,80.1,mixeval,mixeval_240601,[]
mixtral_8x7b_instruct_v0.1,76.4,mixeval,mixeval_240601,[]
starling_lm_7b_beta,74.8,mixeval,mixeval_240601,[]
yi_1.5_9b_chat,74.2,mixeval,mixeval_240601,[]
gemma_1.1_7b_it,69.6,mixeval,mixeval_240601,[]
vicuna_33b_v1.3,66.3,mixeval,mixeval_240601,[]
llama_2_70b_chat,74.6,mixeval,mixeval_240601,[]
map_neo_instruct_v0.1,70.0,mixeval,mixeval_240601,[]
mistral_7b_instruct_v0.2,70.0,mixeval,mixeval_240601,[]
qwen1.5_7b_chat,71.4,mixeval,mixeval_240601,[]
reka_edge_20240208,68.5,mixeval,mixeval_240601,[]
zephyr_7b_beta,69.1,mixeval,mixeval_240601,[]
llama_2_7b_chat,61.7,mixeval,mixeval_240601,[]
yi_6b_chat,65.6,mixeval,mixeval_240601,[]
qwen1.5_moe_a2.7b_chat,69.1,mixeval,mixeval_240601,[]
gemma_1.1_2b_it,51.9,mixeval,mixeval_240601,[]
vicuna_7b_v1.5,60.3,mixeval,mixeval_240601,[]
olmo_7b_instruct,55.0,mixeval,mixeval_240601,[]
qwen1.5_4b_chat,57.2,mixeval,mixeval_240601,[]
jetmoe_8b_chat,51.6,mixeval,mixeval_240601,[]
mpt_7b_chat,43.8,mixeval,mixeval_240601,[]
llama_3_70b,82.2,mixeval,mixeval_240601,[]
qwen1.5_72b,79.5,mixeval,mixeval_240601,[]
yi_34b,78.3,mixeval,mixeval_240601,[]
qwen1.5_32b,77.6,mixeval,mixeval_240601,[]
mixtral_8x7b,74.0,mixeval,mixeval_240601,[]
llama_2_70b,73.2,mixeval,mixeval_240601,[]
qwen1.5_moe_a2.7b,70.2,mixeval,mixeval_240601,[]
qwen1.5_7b,68.2,mixeval,mixeval_240601,[]
llama_3_8b,65.1,mixeval,mixeval_240601,[]
mistral_7b,64.8,mixeval,mixeval_240601,[]
gemma_7b,64.7,mixeval,mixeval_240601,[]
yi_6b,63.1,mixeval,mixeval_240601,[]
qwen1.5_4b,58.2,mixeval,mixeval_240601,[]
jetmoe_8b,57.1,mixeval,mixeval_240601,[]
deepseek_7b,52.2,mixeval,mixeval_240601,[]
phi_2,51.9,mixeval,mixeval_240601,[]
deepseekmoe_16b,51.4,mixeval,mixeval_240601,[]
llama_2_7b,43.1,mixeval,mixeval_240601,[]
gemma_2b,38.9,mixeval,mixeval_240601,[]
olmo_7b,31.8,mixeval,mixeval_240601,[]
mpt_7b,30.8,mixeval,mixeval_240601,[]
gpt_4_0314,0.57,agieval,BLZ_240312,[]
gpt_4_0613,0.57,agieval,BLZ_240312,[]
claude_1,0.49700000000000005,agieval,BLZ_240312,[]
mixtral_8x7b_instruct_v0.1,0.45299999999999996,agieval,BLZ_240312,[]
yi_34b_chat,0.508,agieval,BLZ_240312,[]
gpt_3.5_turbo_0314,0.43200000000000005,agieval,BLZ_240312,[]
vicuna_33b,0.373,agieval,BLZ_240312,[]
starling_lm_7b_alpha,0.401,agieval,BLZ_240312,[]
llama_2_70b_chat,0.45,agieval,BLZ_240312,[]
openhermes_2.5_mistral_7b,0.43,agieval,BLZ_240312,[]
openchat_3.5,0.42700000000000005,agieval,BLZ_240312,[]
solar_10.7b_instruct_v1.0,0.47600000000000003,agieval,BLZ_240312,[]
dolphin_2.2.1_mistral_7b,0.392,agieval,BLZ_240312,[]
zephyr_7b_beta,0.406,agieval,BLZ_240312,[]
llama_2_13b_chat,0.336,agieval,BLZ_240312,[]
vicuna_13b,0.368,agieval,BLZ_240312,[]
zephyr_7b_alpha,0.38,agieval,BLZ_240312,[]
qwen_14b_chat,0.396,agieval,BLZ_240312,[]
llama_2_7b_chat,0.29600000000000004,agieval,BLZ_240312,[]
mistral_7b_instruct_v0.1,0.335,agieval,BLZ_240312,[]
vicuna_7b,0.314,agieval,BLZ_240312,[]
chatglm3_6b,0.414,agieval,BLZ_240312,[]
chatglm_6b,0.325,agieval,BLZ_240312,[]
llama_13b,0.205,agieval,BLZ_240312,[]
gpt_4_0314,0.963,arc_c,BLZ_240312,[]
mistral_medium,0.899,arc_c,BLZ_240312,[]
mixtral_8x7b_instruct_v0.1,0.7021999999999999,arc_c,BLZ_240312,[]
yi_34b_chat,0.6544,arc_c,BLZ_240312,[]
gpt_3.5_turbo_0314,0.855,arc_c,BLZ_240312,[]
wizardlm_70b_v1.0,0.6544,arc_c,BLZ_240312,[]
tulu_2_dpo_70b,0.721,arc_c,BLZ_240312,[]
vicuna_33b,0.6212,arc_c,BLZ_240312,[]
starling_lm_7b_alpha,0.6382,arc_c,BLZ_240312,[]
llama_2_70b_chat,0.6459,arc_c,BLZ_240312,[]
openhermes_2.5_mistral_7b,0.6493000000000001,arc_c,BLZ_240312,[]
openchat_3.5,0.6391,arc_c,BLZ_240312,[]
solar_10.7b_instruct_v1.0,0.7108,arc_c,BLZ_240312,[]
dolphin_2.2.1_mistral_7b,0.6331,arc_c,BLZ_240312,[]
wizardlm_13b_v1.2,0.5904,arc_c,BLZ_240312,[]
zephyr_7b_beta,0.6203,arc_c,BLZ_240312,[]
mpt_30b_chat,0.5870000000000001,arc_c,BLZ_240312,[]
codellama_34b_instruct,0.5427000000000001,arc_c,BLZ_240312,[]
llama_2_13b_chat,0.5904,arc_c,BLZ_240312,[]
vicuna_13b,0.5708,arc_c,BLZ_240312,[]
zephyr_7b_alpha,0.6101,arc_c,BLZ_240312,[]
falcon_180b_chat,0.6945,arc_c,BLZ_240312,[]
llama_2_7b_chat,0.529,arc_c,BLZ_240312,[]
mistral_7b_instruct_v0.1,0.5452,arc_c,BLZ_240312,[]
vicuna_7b,0.5324,arc_c,BLZ_240312,[]
yi_34bx2_moe_60b,0.7108,arc_c,BLZ_240312,[]
gpt_4_1106_preview,0.977,alpacav1,BLZ_240312,[]
gpt_4_0314,0.9528,alpacav1,BLZ_240312,[]
gpt_4_0613,0.9528,alpacav1,BLZ_240312,[]
mistral_medium,0.9682999999999999,alpacav1,BLZ_240312,[]
claude_1,0.8839,alpacav1,BLZ_240312,[]
claude_2.0,0.9136,alpacav1,BLZ_240312,[]
gemini_pro_dev_api,0.7966,alpacav1,BLZ_240312,[]
claude_2.1,0.8708,alpacav1,BLZ_240312,[]
gpt_3.5_turbo_0613,0.8937,alpacav1,BLZ_240312,[]
mixtral_8x7b_instruct_v0.1,0.9478,alpacav1,BLZ_240312,[]
yi_34b_chat,0.9408,alpacav1,BLZ_240312,[]
gemini_pro,0.7966,alpacav1,BLZ_240312,[]
gpt_3.5_turbo_0314,0.8937,alpacav1,BLZ_240312,[]
tulu_2_dpo_70b,0.9503,alpacav1,BLZ_240312,[]
vicuna_33b,0.8898999999999999,alpacav1,BLZ_240312,[]
starling_lm_7b_alpha,0.9198999999999999,alpacav1,BLZ_240312,[]
llama_2_70b_chat,0.9266,alpacav1,BLZ_240312,[]
openchat_3.5,0.8851,alpacav1,BLZ_240312,[]
gpt_3.5_turbo_1106,0.8626,alpacav1,BLZ_240312,[]
wizardlm_13b_v1.2,0.8917,alpacav1,BLZ_240312,[]
zephyr_7b_beta,0.9059999999999999,alpacav1,BLZ_240312,[]
llama_2_13b_chat,0.8109000000000001,alpacav1,BLZ_240312,[]
zephyr_7b_alpha,0.8576,alpacav1,BLZ_240312,[]
guanaco_33b,0.6596,alpacav1,BLZ_240312,[]
llama_2_7b_chat,0.7137,alpacav1,BLZ_240312,[]
chatglm2_6b,0.47130000000000005,alpacav1,BLZ_240312,[]
openassistant_pythia_12b,0.2596,alpacav1,BLZ_240312,[]
gpt_4_1106_preview,0.5,alpacav2,BLZ_240312,[]
gpt_4_0314,0.221,alpacav2,BLZ_240312,[]
gpt_4_0613,0.158,alpacav2,BLZ_240312,[]
mistral_medium,0.21899999999999997,alpacav2,BLZ_240312,[]
claude_1,0.17,alpacav2,BLZ_240312,[]
claude_2.0,0.172,alpacav2,BLZ_240312,[]
gemini_pro_dev_api,0.16899999999999998,alpacav2,BLZ_240312,[]
claude_2.1,0.157,alpacav2,BLZ_240312,[]
gpt_3.5_turbo_0613,0.141,alpacav2,BLZ_240312,[]
mixtral_8x7b_instruct_v0.1,0.183,alpacav2,BLZ_240312,[]
yi_34b_chat,0.297,alpacav2,BLZ_240312,[]
gemini_pro,0.16899999999999998,alpacav2,BLZ_240312,[]
claude_instant_1,0.161,alpacav2,BLZ_240312,[]
gpt_3.5_turbo_0314,0.096,alpacav2,BLZ_240312,[]
wizardlm_70b_v1.0,0.14400000000000002,alpacav2,BLZ_240312,[]
tulu_2_dpo_70b,0.16,alpacav2,BLZ_240312,[]
vicuna_33b,0.127,alpacav2,BLZ_240312,[]
starling_lm_7b_alpha,0.142,alpacav2,BLZ_240312,[]
deepseek_llm_67b_chat,0.121,alpacav2,BLZ_240312,[]
llama_2_70b_chat,0.139,alpacav2,BLZ_240312,[]
openhermes_2.5_mistral_7b,0.10300000000000001,alpacav2,BLZ_240312,[]
gpt_3.5_turbo_1106,0.092,alpacav2,BLZ_240312,[]
dolphin_2.2.1_mistral_7b,0.09,alpacav2,BLZ_240312,[]
wizardlm_13b_v1.2,0.12,alpacav2,BLZ_240312,[]
zephyr_7b_beta,0.11,alpacav2,BLZ_240312,[]
llama_2_13b_chat,0.077,alpacav2,BLZ_240312,[]
vicuna_13b,0.067,alpacav2,BLZ_240312,[]
zephyr_7b_alpha,0.084,alpacav2,BLZ_240312,[]
qwen_14b_chat,0.075,alpacav2,BLZ_240312,[]
guanaco_33b,0.05,alpacav2,BLZ_240312,[]
llama_2_7b_chat,0.0496,alpacav2,BLZ_240312,[]
vicuna_7b,0.048,alpacav2,BLZ_240312,[]
chatglm2_6b,0.027999999999999997,alpacav2,BLZ_240312,[]
openassistant_pythia_12b,0.018000000000000002,alpacav2,BLZ_240312,[]
gpt_4_1106_preview,0.32799999999999996,alpacaeval2_lc,BLZ_240312,[]
gpt_4_0314,0.21600000000000003,alpacaeval2_lc,BLZ_240312,[]
gpt_4_0613,0.18600000000000003,alpacaeval2_lc,BLZ_240312,[]
mistral_medium,0.196,alpacaeval2_lc,BLZ_240312,[]
claude_1,0.21100000000000002,alpacaeval2_lc,BLZ_240312,[]
claude_2.0,0.21600000000000003,alpacaeval2_lc,BLZ_240312,[]
gemini_pro_dev_api,0.172,alpacaeval2_lc,BLZ_240312,[]
claude_2.1,0.193,alpacaeval2_lc,BLZ_240312,[]
gpt_3.5_turbo_0613,0.14300000000000002,alpacaeval2_lc,BLZ_240312,[]
mixtral_8x7b_instruct_v0.1,0.168,alpacaeval2_lc,BLZ_240312,[]
yi_34b_chat,0.188,alpacaeval2_lc,BLZ_240312,[]
claude_instant_1,0.195,alpacaeval2_lc,BLZ_240312,[]
gpt_3.5_turbo_0314,0.156,alpacaeval2_lc,BLZ_240312,[]
wizardlm_70b_v1.0,0.125,alpacaeval2_lc,BLZ_240312,[]
tulu_2_dpo_70b,0.151,alpacaeval2_lc,BLZ_240312,[]
vicuna_33b,0.115,alpacaeval2_lc,BLZ_240312,[]
starling_lm_7b_alpha,0.10099999999999999,alpacaeval2_lc,BLZ_240312,[]
deepseek_llm_67b_chat,0.141,alpacaeval2_lc,BLZ_240312,[]
llama_2_70b_chat,0.10400000000000001,alpacaeval2_lc,BLZ_240312,[]
openhermes_2.5_mistral_7b,0.126,alpacaeval2_lc,BLZ_240312,[]
gpt_3.5_turbo_1106,0.155,alpacaeval2_lc,BLZ_240312,[]
dolphin_2.2.1_mistral_7b,0.10800000000000001,alpacaeval2_lc,BLZ_240312,[]
wizardlm_13b_v1.2,0.099,alpacaeval2_lc,BLZ_240312,[]
zephyr_7b_beta,0.102,alpacaeval2_lc,BLZ_240312,[]
llama_2_13b_chat,0.068,alpacaeval2_lc,BLZ_240312,[]
vicuna_13b,0.085,alpacaeval2_lc,BLZ_240312,[]
zephyr_7b_alpha,0.086,alpacaeval2_lc,BLZ_240312,[]
qwen_14b_chat,0.1,alpacaeval2_lc,BLZ_240312,[]
llama_2_7b_chat,0.045,alpacaeval2_lc,BLZ_240312,[]
vicuna_7b,0.06,alpacaeval2_lc,BLZ_240312,[]
gpt_4_0125_preview,1.0,arena_elo,BLZ_240312,[]
gpt_4_1106_preview,0.9992019154030327,arena_elo,BLZ_240312,[]
bard_gemini_pro,0.9768555466879489,arena_elo,BLZ_240312,[]
gpt_4_0314,0.9497206703910615,arena_elo,BLZ_240312,[]
gpt_4_0613,0.9273743016759777,arena_elo,BLZ_240312,[]
mistral_medium,0.9177972865123704,arena_elo,BLZ_240312,[]
claude_1,0.9169992019154031,arena_elo,BLZ_240312,[]
claude_2.0,0.9034317637669593,arena_elo,BLZ_240312,[]
gemini_pro_dev_api,0.8938547486033519,arena_elo,BLZ_240312,[]
claude_2.1,0.8930566640063847,arena_elo,BLZ_240312,[]
gpt_3.5_turbo_0613,0.8922585794094174,arena_elo,BLZ_240312,[]
mixtral_8x7b_instruct_v0.1,0.8922585794094174,arena_elo,BLZ_240312,[]
yi_34b_chat,0.8898643256185156,arena_elo,BLZ_240312,[]
gemini_pro,0.8890662410215483,arena_elo,BLZ_240312,[]
claude_instant_1,0.8850758180367119,arena_elo,BLZ_240312,[]
gpt_3.5_turbo_0314,0.8818834796488427,arena_elo,BLZ_240312,[]
wizardlm_70b_v1.0,0.8818834796488427,arena_elo,BLZ_240312,[]
tulu_2_dpo_70b,0.8810853950518756,arena_elo,BLZ_240312,[]
vicuna_33b,0.8723064644852354,arena_elo,BLZ_240312,[]
starling_lm_7b_alpha,0.8699122106943336,arena_elo,BLZ_240312,[]
deepseek_llm_67b_chat,0.8635275339185954,arena_elo,BLZ_240312,[]
llama_2_70b_chat,0.8635275339185954,arena_elo,BLZ_240312,[]
nv_llama2_70b_steerlm_chat,0.8603351955307262,arena_elo,BLZ_240312,[]
openhermes_2.5_mistral_7b,0.8603351955307262,arena_elo,BLZ_240312,[]
openchat_3.5,0.8587390263367917,arena_elo,BLZ_240312,[]
pplx_70b_online,0.8587390263367917,arena_elo,BLZ_240312,[]
gpt_3.5_turbo_1106,0.8547486033519553,arena_elo,BLZ_240312,[]
solar_10.7b_instruct_v1.0,0.8499600957701516,arena_elo,BLZ_240312,[]
dolphin_2.2.1_mistral_7b,0.8499600957701516,arena_elo,BLZ_240312,[]
wizardlm_13b_v1.2,0.8443735035913806,arena_elo,BLZ_240312,[]
zephyr_7b_beta,0.8387869114126097,arena_elo,BLZ_240312,[]
mpt_30b_chat,0.8332003192338387,arena_elo,BLZ_240312,[]
codellama_34b_instruct,0.8324022346368715,arena_elo,BLZ_240312,[]
llama_2_13b_chat,0.8316041500399042,arena_elo,BLZ_240312,[]
vicuna_13b,0.8300079808459697,arena_elo,BLZ_240312,[]
pplx_7b_online,0.8284118116520351,arena_elo,BLZ_240312,[]
zephyr_7b_alpha,0.8276137270550679,arena_elo,BLZ_240312,[]
qwen_14b_chat,0.825219473264166,arena_elo,BLZ_240312,[]
falcon_180b_chat,0.8236233040702314,arena_elo,BLZ_240312,[]
guanaco_33b,0.8236233040702314,arena_elo,BLZ_240312,[]
llama_2_7b_chat,0.8172386272944933,arena_elo,BLZ_240312,[]
stripedhyena_nous_7b,0.8140462889066241,arena_elo,BLZ_240312,[]
mistral_7b_instruct_v0.1,0.8028731045490822,arena_elo,BLZ_240312,[]
palm_chat_bison_001,0.8028731045490822,arena_elo,BLZ_240312,[]
vicuna_7b,0.8020750199521149,arena_elo,BLZ_240312,[]
koala_13b,0.770949720670391,arena_elo,BLZ_240312,[]
chatglm3_6b,0.7661612130885874,arena_elo,BLZ_240312,[]
gpt4all_13b_snoozy,0.74780526735834,arena_elo,BLZ_240312,[]
mpt_7b_chat,0.7430167597765364,arena_elo,BLZ_240312,[]
chatglm2_6b,0.7422186751795691,arena_elo,BLZ_240312,[]
rwkv_4_raven_14b,0.7382282521947326,arena_elo,BLZ_240312,[]
alpaca_13b,0.7214684756584198,arena_elo,BLZ_240312,[]
openassistant_pythia_12b,0.7158818834796489,arena_elo,BLZ_240312,[]
chatglm_6b,0.704708699122107,arena_elo,BLZ_240312,[]
fastchat_t5_3b,0.6975259377494014,arena_elo,BLZ_240312,[]
stablelm_tuned_alpha_7b,0.6743814844373504,arena_elo,BLZ_240312,[]
dolly_v2_12b,0.6568236233040702,arena_elo,BLZ_240312,[]
llama_13b,0.6384676775738228,arena_elo,BLZ_240312,[]
gpt_4_1106_preview,0.8390000000000001,bbh,BLZ_240312,[]
gpt_4_0314,0.867,bbh,BLZ_240312,[]
gpt_4_0613,0.867,bbh,BLZ_240312,[]
claude_1,0.6729999999999999,bbh,BLZ_240312,[]
gemini_pro_dev_api,0.6559999999999999,bbh,BLZ_240312,[]
gpt_3.5_turbo_0613,0.71,bbh,BLZ_240312,[]
mixtral_8x7b_instruct_v0.1,0.67,bbh,BLZ_240312,[]
yi_34b_chat,0.7170000000000001,bbh,BLZ_240312,[]
gemini_pro,0.6559999999999999,bbh,BLZ_240312,[]
tulu_2_dpo_70b,0.66,bbh,BLZ_240312,[]
vicuna_33b,0.52,bbh,BLZ_240312,[]
llama_2_70b_chat,0.608,bbh,BLZ_240312,[]
gpt_3.5_turbo_1106,0.71,bbh,BLZ_240312,[]
dolphin_2.2.1_mistral_7b,0.598,bbh,BLZ_240312,[]
llama_2_13b_chat,0.5820000000000001,bbh,BLZ_240312,[]
vicuna_13b,0.515,bbh,BLZ_240312,[]
qwen_14b_chat,0.537,bbh,BLZ_240312,[]
llama_2_7b_chat,0.35600000000000004,bbh,BLZ_240312,[]
mistral_7b_instruct_v0.1,0.5670000000000001,bbh,BLZ_240312,[]
vicuna_7b,0.434,bbh,BLZ_240312,[]
llama_13b,0.379,bbh,BLZ_240312,[]
gpt_4_1106_preview,0.8604999999999999,eq_benchv2,BLZ_240312,[]
gpt_4_0314,0.8573000000000001,eq_benchv2,BLZ_240312,[]
gpt_4_0613,0.8479000000000001,eq_benchv2,BLZ_240312,[]
mistral_medium,0.8256999999999999,eq_benchv2,BLZ_240312,[]
claude_1,0.7683,eq_benchv2,BLZ_240312,[]
claude_2.0,0.7289,eq_benchv2,BLZ_240312,[]
gemini_pro_dev_api,0.7508,eq_benchv2,BLZ_240312,[]
claude_2.1,0.7395999999999999,eq_benchv2,BLZ_240312,[]
gpt_3.5_turbo_0613,0.6934999999999999,eq_benchv2,BLZ_240312,[]
mixtral_8x7b_instruct_v0.1,0.7237,eq_benchv2,BLZ_240312,[]
yi_34b_chat,0.7162000000000001,eq_benchv2,BLZ_240312,[]
claude_instant_1,0.6904,eq_benchv2,BLZ_240312,[]
gpt_3.5_turbo_0314,0.7067,eq_benchv2,BLZ_240312,[]
wizardlm_70b_v1.0,0.7128,eq_benchv2,BLZ_240312,[]
tulu_2_dpo_70b,0.7663,eq_benchv2,BLZ_240312,[]
vicuna_33b,0.6707,eq_benchv2,BLZ_240312,[]
starling_lm_7b_alpha,0.7390000000000001,eq_benchv2,BLZ_240312,[]
deepseek_llm_67b_chat,0.7753,eq_benchv2,BLZ_240312,[]
llama_2_70b_chat,0.7359,eq_benchv2,BLZ_240312,[]
openhermes_2.5_mistral_7b,0.6689,eq_benchv2,BLZ_240312,[]
openchat_3.5,0.7218000000000001,eq_benchv2,BLZ_240312,[]
pplx_70b_online,0.6279,eq_benchv2,BLZ_240312,[]
gpt_3.5_turbo_1106,0.7173999999999999,eq_benchv2,BLZ_240312,[]
solar_10.7b_instruct_v1.0,0.7353000000000001,eq_benchv2,BLZ_240312,[]
dolphin_2.2.1_mistral_7b,0.6992,eq_benchv2,BLZ_240312,[]
wizardlm_13b_v1.2,0.6371,eq_benchv2,BLZ_240312,[]
zephyr_7b_beta,0.5832999999999999,eq_benchv2,BLZ_240312,[]
codellama_34b_instruct,0.4915,eq_benchv2,BLZ_240312,[]
llama_2_13b_chat,0.49119999999999997,eq_benchv2,BLZ_240312,[]
vicuna_13b,0.6739,eq_benchv2,BLZ_240312,[]
pplx_7b_online,0.4891,eq_benchv2,BLZ_240312,[]
zephyr_7b_alpha,0.5682,eq_benchv2,BLZ_240312,[]
qwen_14b_chat,0.6347,eq_benchv2,BLZ_240312,[]
falcon_180b_chat,0.5682,eq_benchv2,BLZ_240312,[]
guanaco_33b,0.3611,eq_benchv2,BLZ_240312,[]
llama_2_7b_chat,0.3632,eq_benchv2,BLZ_240312,[]
stripedhyena_nous_7b,0.5458,eq_benchv2,BLZ_240312,[]
mistral_7b_instruct_v0.1,0.5215,eq_benchv2,BLZ_240312,[]
yi_34bx2_moe_60b,0.7269,eq_benchv2,BLZ_240312,[]
mixtral_8x7b_instruct_v0.1,0.7641,gpt4all,BLZ_240312,[]
yi_34b_chat,0.7212999999999999,gpt4all,BLZ_240312,[]
starling_lm_7b_alpha,0.7272,gpt4all,BLZ_240312,[]
openhermes_2.5_mistral_7b,0.7312000000000001,gpt4all,BLZ_240312,[]
openchat_3.5,0.7292000000000001,gpt4all,BLZ_240312,[]
solar_10.7b_instruct_v1.0,0.7511,gpt4all,BLZ_240312,[]
dolphin_2.2.1_mistral_7b,0.7223999999999999,gpt4all,BLZ_240312,[]
zephyr_7b_beta,0.7182999999999999,gpt4all,BLZ_240312,[]
vicuna_13b,0.631,gpt4all,BLZ_240312,[]
zephyr_7b_alpha,0.7223999999999999,gpt4all,BLZ_240312,[]
mistral_7b_instruct_v0.1,0.6795,gpt4all,BLZ_240312,[]
vicuna_7b,0.61,gpt4all,BLZ_240312,[]
koala_13b,0.62,gpt4all,BLZ_240312,[]
gpt4all_13b_snoozy,0.653,gpt4all,BLZ_240312,[]
mpt_7b_chat,0.648,gpt4all,BLZ_240312,[]
openassistant_pythia_12b,0.61,gpt4all,BLZ_240312,[]
fastchat_t5_3b,0.537,gpt4all,BLZ_240312,[]
stablelm_tuned_alpha_7b,0.513,gpt4all,BLZ_240312,[]
llama_13b,0.63,gpt4all,BLZ_240312,[]
mixtral_8x7b_instruct_v0.1,0.7262000000000001,hugging_6,BLZ_240312,[]
yi_34b_chat,0.6531999999999999,hugging_6,BLZ_240312,[]
wizardlm_70b_v1.0,0.6125,hugging_6,BLZ_240312,[]
tulu_2_dpo_70b,0.7376999999999999,hugging_6,BLZ_240312,[]
vicuna_33b,0.585,hugging_6,BLZ_240312,[]
starling_lm_7b_alpha,0.6713,hugging_6,BLZ_240312,[]
llama_2_70b_chat,0.624,hugging_6,BLZ_240312,[]
openhermes_2.5_mistral_7b,0.6152000000000001,hugging_6,BLZ_240312,[]
openchat_3.5,0.6124,hugging_6,BLZ_240312,[]
solar_10.7b_instruct_v1.0,0.742,hugging_6,BLZ_240312,[]
dolphin_2.2.1_mistral_7b,0.6493000000000001,hugging_6,BLZ_240312,[]
wizardlm_13b_v1.2,0.5476,hugging_6,BLZ_240312,[]
zephyr_7b_beta,0.6195,hugging_6,BLZ_240312,[]
mpt_30b_chat,0.5538000000000001,hugging_6,BLZ_240312,[]
codellama_34b_instruct,0.5729,hugging_6,BLZ_240312,[]
llama_2_13b_chat,0.5490999999999999,hugging_6,BLZ_240312,[]
vicuna_13b,0.5539999999999999,hugging_6,BLZ_240312,[]
zephyr_7b_alpha,0.595,hugging_6,BLZ_240312,[]
falcon_180b_chat,0.6785,hugging_6,BLZ_240312,[]
llama_2_7b_chat,0.5074000000000001,hugging_6,BLZ_240312,[]
mistral_7b_instruct_v0.1,0.5496,hugging_6,BLZ_240312,[]
vicuna_7b,0.521,hugging_6,BLZ_240312,[]
yi_34bx2_moe_60b,0.7672,hugging_6,BLZ_240312,[]
gpt_4_0314,0.93,llmonitor,BLZ_240312,[]
gpt_4_0613,0.89,llmonitor,BLZ_240312,[]
claude_1,0.66,llmonitor,BLZ_240312,[]
claude_2.0,0.68,llmonitor,BLZ_240312,[]
gpt_3.5_turbo_0613,0.81,llmonitor,BLZ_240312,[]
claude_instant_1,0.6,llmonitor,BLZ_240312,[]
gpt_3.5_turbo_0314,0.79,llmonitor,BLZ_240312,[]
llama_2_70b_chat,0.6,llmonitor,BLZ_240312,[]
mpt_30b_chat,0.4,llmonitor,BLZ_240312,[]
codellama_34b_instruct,0.34,llmonitor,BLZ_240312,[]
llama_2_13b_chat,0.5,llmonitor,BLZ_240312,[]
vicuna_13b,0.5,llmonitor,BLZ_240312,[]
falcon_180b_chat,0.67,llmonitor,BLZ_240312,[]
guanaco_33b,0.43,llmonitor,BLZ_240312,[]
llama_2_7b_chat,0.5,llmonitor,BLZ_240312,[]
mistral_7b_instruct_v0.1,0.57,llmonitor,BLZ_240312,[]
palm_chat_bison_001,0.57,llmonitor,BLZ_240312,[]
vicuna_7b,0.41,llmonitor,BLZ_240312,[]
koala_13b,0.31,llmonitor,BLZ_240312,[]
mpt_7b_chat,0.43,llmonitor,BLZ_240312,[]
dolly_v2_12b,0.23,llmonitor,BLZ_240312,[]
mistral_medium,0.654,magi,BLZ_240312,[]
gemini_pro_dev_api,0.528,magi,BLZ_240312,[]
gpt_3.5_turbo_0613,0.455,magi,BLZ_240312,[]
mixtral_8x7b_instruct_v0.1,0.49560000000000004,magi,BLZ_240312,[]
yi_34b_chat,0.5821999999999999,magi,BLZ_240312,[]
gpt_3.5_turbo_0314,0.512,magi,BLZ_240312,[]
wizardlm_70b_v1.0,0.4476,magi,BLZ_240312,[]
tulu_2_dpo_70b,0.5212,magi,BLZ_240312,[]
vicuna_33b,0.3837,magi,BLZ_240312,[]
starling_lm_7b_alpha,0.4304,magi,BLZ_240312,[]
deepseek_llm_67b_chat,0.5946,magi,BLZ_240312,[]
llama_2_70b_chat,0.39899999999999997,magi,BLZ_240312,[]
openhermes_2.5_mistral_7b,0.4236,magi,BLZ_240312,[]
openchat_3.5,0.42200000000000004,magi,BLZ_240312,[]
gpt_3.5_turbo_1106,0.462,magi,BLZ_240312,[]
solar_10.7b_instruct_v1.0,0.4693,magi,BLZ_240312,[]
dolphin_2.2.1_mistral_7b,0.3782,magi,BLZ_240312,[]
wizardlm_13b_v1.2,0.3678,magi,BLZ_240312,[]
zephyr_7b_beta,0.4042,magi,BLZ_240312,[]
llama_2_13b_chat,0.37170000000000003,magi,BLZ_240312,[]
vicuna_13b,0.36560000000000004,magi,BLZ_240312,[]
zephyr_7b_alpha,0.39899999999999997,magi,BLZ_240312,[]
qwen_14b_chat,0.4535,magi,BLZ_240312,[]
guanaco_33b,0.38659999999999994,magi,BLZ_240312,[]
llama_2_7b_chat,0.35969999999999996,magi,BLZ_240312,[]
mistral_7b_instruct_v0.1,0.3704,magi,BLZ_240312,[]
gpt_4_1106_preview,0.805,mmlu,BLZ_240312,[]
gpt_4_0314,0.8640000000000001,mmlu,BLZ_240312,[]
mistral_medium,0.753,mmlu,BLZ_240312,[]
claude_1,0.77,mmlu,BLZ_240312,[]
claude_2.0,0.785,mmlu,BLZ_240312,[]
gemini_pro_dev_api,0.718,mmlu,BLZ_240312,[]
mixtral_8x7b_instruct_v0.1,0.706,mmlu,BLZ_240312,[]
yi_34b_chat,0.735,mmlu,BLZ_240312,[]
gemini_pro,0.718,mmlu,BLZ_240312,[]
claude_instant_1,0.7340000000000001,mmlu,BLZ_240312,[]
gpt_3.5_turbo_0314,0.7,mmlu,BLZ_240312,[]
wizardlm_70b_v1.0,0.637,mmlu,BLZ_240312,[]
tulu_2_dpo_70b,0.698,mmlu,BLZ_240312,[]
vicuna_33b,0.5920000000000001,mmlu,BLZ_240312,[]
starling_lm_7b_alpha,0.639,mmlu,BLZ_240312,[]
deepseek_llm_67b_chat,0.713,mmlu,BLZ_240312,[]
llama_2_70b_chat,0.63,mmlu,BLZ_240312,[]
nv_llama2_70b_steerlm_chat,0.685,mmlu,BLZ_240312,[]
openhermes_2.5_mistral_7b,0.638,mmlu,BLZ_240312,[]
openchat_3.5,0.643,mmlu,BLZ_240312,[]
gpt_3.5_turbo_1106,0.6779999999999999,mmlu,BLZ_240312,[]
solar_10.7b_instruct_v1.0,0.662,mmlu,BLZ_240312,[]
dolphin_2.2.1_mistral_7b,0.632,mmlu,BLZ_240312,[]
wizardlm_13b_v1.2,0.527,mmlu,BLZ_240312,[]
zephyr_7b_beta,0.614,mmlu,BLZ_240312,[]
mpt_30b_chat,0.504,mmlu,BLZ_240312,[]
codellama_34b_instruct,0.537,mmlu,BLZ_240312,[]
llama_2_13b_chat,0.536,mmlu,BLZ_240312,[]
vicuna_13b,0.5579999999999999,mmlu,BLZ_240312,[]
zephyr_7b_alpha,0.614,mmlu,BLZ_240312,[]
qwen_14b_chat,0.665,mmlu,BLZ_240312,[]
falcon_180b_chat,0.68,mmlu,BLZ_240312,[]
guanaco_33b,0.5760000000000001,mmlu,BLZ_240312,[]
llama_2_7b_chat,0.45799999999999996,mmlu,BLZ_240312,[]
mistral_7b_instruct_v0.1,0.5539999999999999,mmlu,BLZ_240312,[]
vicuna_7b,0.51,mmlu,BLZ_240312,[]
koala_13b,0.447,mmlu,BLZ_240312,[]
gpt4all_13b_snoozy,0.43,mmlu,BLZ_240312,[]
mpt_7b_chat,0.32,mmlu,BLZ_240312,[]
chatglm2_6b,0.455,mmlu,BLZ_240312,[]
rwkv_4_raven_14b,0.256,mmlu,BLZ_240312,[]
alpaca_13b,0.48100000000000004,mmlu,BLZ_240312,[]
openassistant_pythia_12b,0.27,mmlu,BLZ_240312,[]
chatglm_6b,0.361,mmlu,BLZ_240312,[]
fastchat_t5_3b,0.47700000000000004,mmlu,BLZ_240312,[]
stablelm_tuned_alpha_7b,0.244,mmlu,BLZ_240312,[]
dolly_v2_12b,0.257,mmlu,BLZ_240312,[]
llama_13b,0.47,mmlu,BLZ_240312,[]
yi_34bx2_moe_60b,0.775,mmlu,BLZ_240312,[]
gpt_4_0125_preview,0.0929,mt_bench,BLZ_240312,[]
gpt_4_1106_preview,0.0932,mt_bench,BLZ_240312,[]
gpt_4_0314,0.08960000000000001,mt_bench,BLZ_240312,[]
gpt_4_0613,0.09179999999999999,mt_bench,BLZ_240312,[]
mistral_medium,0.0861,mt_bench,BLZ_240312,[]
claude_1,0.079,mt_bench,BLZ_240312,[]
claude_2.0,0.0806,mt_bench,BLZ_240312,[]
gemini_pro_dev_api,0.08039999999999999,mt_bench,BLZ_240312,[]
claude_2.1,0.0818,mt_bench,BLZ_240312,[]
gpt_3.5_turbo_0613,0.0839,mt_bench,BLZ_240312,[]
mixtral_8x7b_instruct_v0.1,0.083,mt_bench,BLZ_240312,[]
yi_34b_chat,0.07769999999999999,mt_bench,BLZ_240312,[]
gemini_pro,0.08039999999999999,mt_bench,BLZ_240312,[]
claude_instant_1,0.0785,mt_bench,BLZ_240312,[]
gpt_3.5_turbo_0314,0.0794,mt_bench,BLZ_240312,[]
wizardlm_70b_v1.0,0.0771,mt_bench,BLZ_240312,[]
tulu_2_dpo_70b,0.0789,mt_bench,BLZ_240312,[]
vicuna_33b,0.0712,mt_bench,BLZ_240312,[]
starling_lm_7b_alpha,0.0809,mt_bench,BLZ_240312,[]
deepseek_llm_67b_chat,0.08529999999999999,mt_bench,BLZ_240312,[]
llama_2_70b_chat,0.06860000000000001,mt_bench,BLZ_240312,[]
nv_llama2_70b_steerlm_chat,0.0754,mt_bench,BLZ_240312,[]
openhermes_2.5_mistral_7b,0.07690000000000001,mt_bench,BLZ_240312,[]
openchat_3.5,0.0781,mt_bench,BLZ_240312,[]
pplx_70b_online,0.0588,mt_bench,BLZ_240312,[]
gpt_3.5_turbo_1106,0.0832,mt_bench,BLZ_240312,[]
solar_10.7b_instruct_v1.0,0.0758,mt_bench,BLZ_240312,[]
wizardlm_13b_v1.2,0.07200000000000001,mt_bench,BLZ_240312,[]
zephyr_7b_beta,0.07339999999999999,mt_bench,BLZ_240312,[]
mpt_30b_chat,0.0639,mt_bench,BLZ_240312,[]
llama_2_13b_chat,0.0665,mt_bench,BLZ_240312,[]
vicuna_13b,0.06570000000000001,mt_bench,BLZ_240312,[]
zephyr_7b_alpha,0.0688,mt_bench,BLZ_240312,[]
qwen_14b_chat,0.0696,mt_bench,BLZ_240312,[]
guanaco_33b,0.0653,mt_bench,BLZ_240312,[]
llama_2_7b_chat,0.06269999999999999,mt_bench,BLZ_240312,[]
mistral_7b_instruct_v0.1,0.0684,mt_bench,BLZ_240312,[]
palm_chat_bison_001,0.064,mt_bench,BLZ_240312,[]
vicuna_7b,0.0617,mt_bench,BLZ_240312,[]
koala_13b,0.0535,mt_bench,BLZ_240312,[]
gpt4all_13b_snoozy,0.0541,mt_bench,BLZ_240312,[]
mpt_7b_chat,0.0542,mt_bench,BLZ_240312,[]
chatglm2_6b,0.0496,mt_bench,BLZ_240312,[]
rwkv_4_raven_14b,0.0398,mt_bench,BLZ_240312,[]
alpaca_13b,0.0453,mt_bench,BLZ_240312,[]
openassistant_pythia_12b,0.0432,mt_bench,BLZ_240312,[]
chatglm_6b,0.045,mt_bench,BLZ_240312,[]
fastchat_t5_3b,0.0304,mt_bench,BLZ_240312,[]
stablelm_tuned_alpha_7b,0.0275,mt_bench,BLZ_240312,[]
dolly_v2_12b,0.032799999999999996,mt_bench,BLZ_240312,[]
llama_13b,0.026099999999999998,mt_bench,BLZ_240312,[]
gpt_4_0613,0.735,mmlu,helm_lite_240610,[]
llama_3_70b,0.695,mmlu,helm_lite_240610,[]
mixtral_8x22b,0.701,mmlu,helm_lite_240610,[]
palmyra_x_v3_72b,0.702,mmlu,helm_lite_240610,[]
gpt_4_turbo_1106_preview,0.699,mmlu,helm_lite_240610,[]
palm_2_unicorn,0.702,mmlu,helm_lite_240610,[]
claude_3_opus_20240229,0.768,mmlu,helm_lite_240610,[]
qwen1.5_72b,0.647,mmlu,helm_lite_240610,[]
palmyra_x_v2_33b,0.621,mmlu,helm_lite_240610,[]
yi_34b,0.65,mmlu,helm_lite_240610,[]
qwen1.5_32b,0.628,mmlu,helm_lite_240610,[]
claude_v1.3,0.631,mmlu,helm_lite_240610,[]
mixtral_8x7b_32k_seqlen,0.649,mmlu,helm_lite_240610,[]
palm_2_bison,0.608,mmlu,helm_lite_240610,[]
claude_2.0,0.639,mmlu,helm_lite_240610,[]
deepseek_llm_chat_67b,0.641,mmlu,helm_lite_240610,[]
llama_2_70b,0.58,mmlu,helm_lite_240610,[]
claude_2.1,0.643,mmlu,helm_lite_240610,[]
gpt_3.5_text_davinci_003,0.555,mmlu,helm_lite_240610,[]
qwen1.5_14b,0.626,mmlu,helm_lite_240610,[]
claude_instant_1.2,0.631,mmlu,helm_lite_240610,[]
llama_3_8b,0.602,mmlu,helm_lite_240610,[]
gpt_3.5_turbo_0613,0.614,mmlu,helm_lite_240610,[]
gemma_7b,0.571,mmlu,helm_lite_240610,[]
claude_3_sonnet_20240229,0.652,mmlu,helm_lite_240610,[]
gpt_3.5_text_davinci_002,0.568,mmlu,helm_lite_240610,[]
llama_65b,0.584,mmlu,helm_lite_240610,[]
mistral_large_2402,0.638,mmlu,helm_lite_240610,[]
cohere_command,0.525,mmlu,helm_lite_240610,[]
dbrx_instructruct,0.643,mmlu,helm_lite_240610,[]
mistral_v0.1_7b,0.584,mmlu,helm_lite_240610,[]
mistral_small_2402,0.593,mmlu,helm_lite_240610,[]
mistral_medium_2312,0.618,mmlu,helm_lite_240610,[]
qwen1.5_7b,0.569,mmlu,helm_lite_240610,[]
claude_3_haiku_20240307,0.662,mmlu,helm_lite_240610,[]
yi_6b,0.53,mmlu,helm_lite_240610,[]
llama_2_13b,0.505,mmlu,helm_lite_240610,[]
jurassic_2_jumbo_178b,0.483,mmlu,helm_lite_240610,[]
falcon_40b,0.507,mmlu,helm_lite_240610,[]
phi_2,0.518,mmlu,helm_lite_240610,[]
jurassic_2_grande_17b,0.471,mmlu,helm_lite_240610,[]
llama_2_7b,0.425,mmlu,helm_lite_240610,[]
luminous_supreme_70b,0.316,mmlu,helm_lite_240610,[]
cohere_command_light,0.386,mmlu,helm_lite_240610,[]
luminous_extended_30b,0.248,mmlu,helm_lite_240610,[]
falcon_7b,0.288,mmlu,helm_lite_240610,[]
olmo_7b,0.305,mmlu,helm_lite_240610,[]
luminous_base_13b,0.243,mmlu,helm_lite_240610,[]
llama_2_70b,0.582,mmlu,helm_classic_240130,[]
llama_65b,0.584,mmlu,helm_classic_240130,[]
text_davinci_002,0.568,mmlu,helm_classic_240130,[]
mistral_v0.1_7b,0.572,mmlu,helm_classic_240130,[]
cohere_command_beta_52.4b,0.452,mmlu,helm_classic_240130,[]
text_davinci_003,0.569,mmlu,helm_classic_240130,[]
jurassic_2_jumbo_178b,0.48,mmlu,helm_classic_240130,[]
llama_2_13b,0.507,mmlu,helm_classic_240130,[]
tnlg_v2_530b,0.469,mmlu,helm_classic_240130,[]
gpt_3.5_turbo_0613,0.391,mmlu,helm_classic_240130,[]
llama_30b,0.531,mmlu,helm_classic_240130,[]
anthropic_lm_v4_s3_52b,0.481,mmlu,helm_classic_240130,[]
gpt_3.5_turbo_0301,0.59,mmlu,helm_classic_240130,[]
jurassic_2_grande_17b,0.475,mmlu,helm_classic_240130,[]
palmyra_x_43b,0.609,mmlu,helm_classic_240130,[]
falcon_40b,0.509,mmlu,helm_classic_240130,[]
falcon_instruct_40b,0.497,mmlu,helm_classic_240130,[]
mpt_instruct_30b,0.444,mmlu,helm_classic_240130,[]
mpt_30b,0.437,mmlu,helm_classic_240130,[]
j1_grande_v2_beta_17b,0.445,mmlu,helm_classic_240130,[]
vicuna_v1.3_13b,0.462,mmlu,helm_classic_240130,[]
cohere_command_beta_6.1b,0.406,mmlu,helm_classic_240130,[]
cohere_xlarge_v20221108_52.4b,0.382,mmlu,helm_classic_240130,[]
luminous_supreme_70b,0.38,mmlu,helm_classic_240130,[]
vicuna_v1.3_7b,0.434,mmlu,helm_classic_240130,[]
opt_175b,0.318,mmlu,helm_classic_240130,[]
llama_2_7b,0.431,mmlu,helm_classic_240130,[]
llama_13b,0.422,mmlu,helm_classic_240130,[]
instructpalmyra_30b,0.403,mmlu,helm_classic_240130,[]
cohere_xlarge_v20220609_52.4b,0.353,mmlu,helm_classic_240130,[]
jurassic_2_large_7.5b,0.339,mmlu,helm_classic_240130,[]
davinci_175b,0.422,mmlu,helm_classic_240130,[]
llama_7b,0.321,mmlu,helm_classic_240130,[]
redpajama_incite_instruct_7b,0.363,mmlu,helm_classic_240130,[]
j1_jumbo_v1_178b,0.259,mmlu,helm_classic_240130,[]
glm_130b,0.344,mmlu,helm_classic_240130,[]
luminous_extended_30b,0.321,mmlu,helm_classic_240130,[]
opt_66b,0.276,mmlu,helm_classic_240130,[]
bloom_176b,0.299,mmlu,helm_classic_240130,[]
j1_grande_v1_17b,0.27,mmlu,helm_classic_240130,[]
alpaca_7b,0.385,mmlu,helm_classic_240130,[]
falcon_7b,0.286,mmlu,helm_classic_240130,[]
redpajama_incite_base_7b,0.302,mmlu,helm_classic_240130,[]
cohere_large_v20220720_13.1b,0.324,mmlu,helm_classic_240130,[]
redpajama_incite_instruct_v1_3b,0.257,mmlu,helm_classic_240130,[]
text_curie_001,0.237,mmlu,helm_classic_240130,[]
gpt_neox_20b,0.276,mmlu,helm_classic_240130,[]
luminous_base_13b,0.27,mmlu,helm_classic_240130,[]
cohere_medium_v20221108_6.1b,0.254,mmlu,helm_classic_240130,[]
redpajama_incite_base_v1_3b,0.263,mmlu,helm_classic_240130,[]
tnlg_v2_6.7b,0.242,mmlu,helm_classic_240130,[]
j1_large_v1_7.5b,0.241,mmlu,helm_classic_240130,[]
gpt_j_6b,0.249,mmlu,helm_classic_240130,[]
pythia_12b,0.274,mmlu,helm_classic_240130,[]
curie_6.7b,0.243,mmlu,helm_classic_240130,[]
falcon_instruct_7b,0.275,mmlu,helm_classic_240130,[]
cohere_medium_v20220720_6.1b,0.279,mmlu,helm_classic_240130,[]
text_babbage_001,0.229,mmlu,helm_classic_240130,[]
t0pp_11b,0.407,mmlu,helm_classic_240130,[]
pythia_6.9b,0.236,mmlu,helm_classic_240130,[]
ul2_20b,0.291,mmlu,helm_classic_240130,[]
t5_11b,0.29,mmlu,helm_classic_240130,[]
babbage_1.3b,0.235,mmlu,helm_classic_240130,[]
cohere_small_v20220720_410m,0.264,mmlu,helm_classic_240130,[]
ada_350m,0.243,mmlu,helm_classic_240130,[]
text_ada_001,0.238,mmlu,helm_classic_240130,[]
yalm_100b,0.243,mmlu,helm_classic_240130,[]
aya_101,0.029411764705882353,biggen_mwr,biggen_240612,[]
c4ai_command_r_plus_gptq,0.8382352941176471,biggen_mwr,biggen_240612,[]
c4ai_command_r_v01,0.6948529411764706,biggen_mwr,biggen_240612,[]
claude_3_haiku_20240307,0.9252450980392157,biggen_mwr,biggen_240612,[]
claude_3_opus_20240229,0.9681372549019608,biggen_mwr,biggen_240612,[]
claude_3_sonnet_20240229,0.9240196078431373,biggen_mwr,biggen_240612,[]
codellama_13b,0.07598039215686275,biggen_mwr,biggen_240612,[]
codellama_13b_instruct,0.4276960784313726,biggen_mwr,biggen_240612,[]
codellama_34b,0.1482843137254902,biggen_mwr,biggen_240612,[]
codellama_34b_instruct,0.5098039215686274,biggen_mwr,biggen_240612,[]
codellama_70b,0.18872549019607843,biggen_mwr,biggen_240612,[]
codellama_70b_instruct,0.27450980392156865,biggen_mwr,biggen_240612,[]
codellama_7b,0.05514705882352941,biggen_mwr,biggen_240612,[]
codellama_7b_instruct,0.36519607843137253,biggen_mwr,biggen_240612,[]
codetulu_2_13b,0.43137254901960786,biggen_mwr,biggen_240612,[]
codetulu_2_34b,0.5441176470588235,biggen_mwr,biggen_240612,[]
codetulu_2_7b,0.32598039215686275,biggen_mwr,biggen_240612,[]
gemini_1.0_pro,0.7107843137254902,biggen_mwr,biggen_240612,[]
gemini_flash_1.5,0.866421568627451,biggen_mwr,biggen_240612,[]
gemini_pro_1.5,0.8676470588235294,biggen_mwr,biggen_240612,[]
gemma_1.1_2b_it,0.33578431372549017,biggen_mwr,biggen_240612,[]
gemma_1.1_7b_it,0.5551470588235294,biggen_mwr,biggen_240612,[]
gemma_2b,0.09803921568627451,biggen_mwr,biggen_240612,[]
gemma_2b_it,0.3333333333333333,biggen_mwr,biggen_240612,[]
gemma_7b,0.013480392156862746,biggen_mwr,biggen_240612,[]
gemma_7b_it,0.40931372549019607,biggen_mwr,biggen_240612,[]
gpt_3.5_turbo_0125,0.7757352941176471,biggen_mwr,biggen_240612,[]
gpt_3.5_turbo_1106,0.758578431372549,biggen_mwr,biggen_240612,[]
gpt_4_0125_preview,0.9779411764705882,biggen_mwr,biggen_240612,[]
gpt_4_1106_preview,0.9889705882352942,biggen_mwr,biggen_240612,[]
gpt_4_turbo_2024_04_09,0.9558823529411765,biggen_mwr,biggen_240612,[]
gpt_4o_2024_05_13,0.9436274509803921,biggen_mwr,biggen_240612,[]
llama_2_13b,0.20220588235294118,biggen_mwr,biggen_240612,[]
llama_2_13b_chat,0.5968137254901961,biggen_mwr,biggen_240612,[]
llama_2_70b,0.4656862745098039,biggen_mwr,biggen_240612,[]
llama_2_70b_chat,0.7205882352941176,biggen_mwr,biggen_240612,[]
llama_2_7b,0.1446078431372549,biggen_mwr,biggen_240612,[]
llama_2_7b_chat,0.5355392156862745,biggen_mwr,biggen_240612,[]
llemma_34b,0.21200980392156862,biggen_mwr,biggen_240612,[]
llemma_7b,0.11029411764705882,biggen_mwr,biggen_240612,[]
meta_llama_3_70b,0.36887254901960786,biggen_mwr,biggen_240612,[]
meta_llama_3_70b_instruct,0.875,biggen_mwr,biggen_240612,[]
meta_llama_3_8b,0.2377450980392157,biggen_mwr,biggen_240612,[]
meta_llama_3_8b_instruct,0.7328431372549019,biggen_mwr,biggen_240612,[]
mistral_7b_instruct_v0.2,0.7156862745098039,biggen_mwr,biggen_240612,[]
mistral_7b_v0.1,0.3272058823529412,biggen_mwr,biggen_240612,[]
mistral_7b_v0.2,0.3137254901960784,biggen_mwr,biggen_240612,[]
mistral_large_hjpark,0.8762254901960784,biggen_mwr,biggen_240612,[]
mistral_medium_hjpark,0.8970588235294118,biggen_mwr,biggen_240612,[]
mistral_orpo_alpha,0.5392156862745098,biggen_mwr,biggen_240612,[]
mistral_orpo_beta,0.5477941176470589,biggen_mwr,biggen_240612,[]
mixtral_8x22b_instruct_v0.1_awq,0.8198529411764706,biggen_mwr,biggen_240612,[]
mixtral_8x22b_v0.1_awq,0.5968137254901961,biggen_mwr,biggen_240612,[]
mixtral_8x7b_instruct_v0.1,0.7647058823529411,biggen_mwr,biggen_240612,[]
mixtral_8x7b_v0.1,0.5453431372549019,biggen_mwr,biggen_240612,[]
nous_hermes_2_mistral_7b_dpo,0.571078431372549,biggen_mwr,biggen_240612,[]
nous_hermes_2_mixtral_8x7b_dpo,0.7095588235294118,biggen_mwr,biggen_240612,[]
nous_hermes_2_mixtral_8x7b_sft,0.6262254901960784,biggen_mwr,biggen_240612,[]
nous_hermes_2_yi_34b,0.5906862745098039,biggen_mwr,biggen_240612,[]
olmo_1b,0.028186274509803922,biggen_mwr,biggen_240612,[]
olmo_7b,0.07107843137254902,biggen_mwr,biggen_240612,[]
olmo_7b_instruct,0.30269607843137253,biggen_mwr,biggen_240612,[]
olmo_7b_sft,0.2549019607843137,biggen_mwr,biggen_240612,[]
openchat_3.5_0106,0.6825980392156863,biggen_mwr,biggen_240612,[]
openhermes_2.5_mistral_7b,0.4583333333333333,biggen_mwr,biggen_240612,[]
openhermes_2_mistral_7b,0.5122549019607843,biggen_mwr,biggen_240612,[]
orca_2_13b,0.17401960784313725,biggen_mwr,biggen_240612,[]
orca_2_7b,0.08700980392156862,biggen_mwr,biggen_240612,[]
phi_1,0.0,biggen_mwr,biggen_240612,[]
phi_1_5,0.15318627450980393,biggen_mwr,biggen_240612,[]
phi_2,0.29044117647058826,biggen_mwr,biggen_240612,[]
phi_3_mini_128k_instruct,0.6911764705882353,biggen_mwr,biggen_240612,[]
phi_3_mini_4k_instruct,0.7867647058823529,biggen_mwr,biggen_240612,[]
qwen1.5_0.5b,0.0428921568627451,biggen_mwr,biggen_240612,[]
qwen1.5_0.5b_chat,0.07965686274509803,biggen_mwr,biggen_240612,[]
qwen1.5_1.8b,0.12867647058823528,biggen_mwr,biggen_240612,[]
qwen1.5_1.8b_chat,0.21691176470588236,biggen_mwr,biggen_240612,[]
qwen1.5_14b,0.3946078431372549,biggen_mwr,biggen_240612,[]
qwen1.5_14b_chat,0.7267156862745098,biggen_mwr,biggen_240612,[]
qwen1.5_32b,0.4791666666666667,biggen_mwr,biggen_240612,[]
qwen1.5_32b_chat,0.8149509803921569,biggen_mwr,biggen_240612,[]
qwen1.5_4b,0.21323529411764705,biggen_mwr,biggen_240612,[]
qwen1.5_4b_chat,0.29411764705882354,biggen_mwr,biggen_240612,[]
qwen1.5_72b,0.5294117647058824,biggen_mwr,biggen_240612,[]
qwen1.5_72b_chat,0.8713235294117647,biggen_mwr,biggen_240612,[]
qwen1.5_7b,0.2610294117647059,biggen_mwr,biggen_240612,[]
qwen1.5_7b_chat,0.6580882352941176,biggen_mwr,biggen_240612,[]
qwen_110b_chat,0.8848039215686274,biggen_mwr,biggen_240612,[]
solar_10.7b_instruct_v1.0,0.6862745098039216,biggen_mwr,biggen_240612,[]
solar_10.7b_v1.0,0.43995098039215685,biggen_mwr,biggen_240612,[]
starling_lm_7b_alpha,0.6139705882352942,biggen_mwr,biggen_240612,[]
starling_lm_7b_beta,0.7573529411764706,biggen_mwr,biggen_240612,[]
tulu_2_13b,0.4313725490196078,biggen_mwr,biggen_240612,[]
tulu_2_7b,0.3553921568627451,biggen_mwr,biggen_240612,[]
tulu_2_dpo_13b,0.5833333333333333,biggen_mwr,biggen_240612,[]
tulu_2_dpo_70b,0.7708333333333334,biggen_mwr,biggen_240612,[]
tulu_2_dpo_7b,0.4767156862745098,biggen_mwr,biggen_240612,[]
yi_34b,0.46078431372549017,biggen_mwr,biggen_240612,[]
yi_34b_chat,0.7720588235294118,biggen_mwr,biggen_240612,[]
yi_6b,0.17892156862745098,biggen_mwr,biggen_240612,[]
yi_6b_chat,0.4117647058823529,biggen_mwr,biggen_240612,[]
zephyr_7b_beta,0.6200980392156863,biggen_mwr,biggen_240612,[]
zephyr_orpo_141b_a35b_v0.1_awq,0.6311274509803921,biggen_mwr,biggen_240612,[]
gpt_4o_0513,1293.0,arena_elo,wildbench_240612,[]
gpt_4_turbo_0409,1251.0,arena_elo,wildbench_240612,[]
gpt_4_turbo_0125,1239.0,arena_elo,wildbench_240612,[]
llama_3_70b_inst,1213.0,arena_elo,wildbench_240612,[]
claude_3_opus,1232.0,arena_elo,wildbench_240612,[]
claude_3_sonnet,1187.0,arena_elo,wildbench_240612,[]
qwen1.5_72b_chat,1143.0,arena_elo,wildbench_240612,[]
command_r_plus,1155.0,arena_elo,wildbench_240612,[]
claude_3_haiku,1169.0,arena_elo,wildbench_240612,[]
mistral_large,1158.0,arena_elo,wildbench_240612,[]
starlinglm_7b_beta,1111.0,arena_elo,wildbench_240612,[]
llama_3_8b_inst,1144.0,arena_elo,wildbench_240612,[]
command_r,1106.0,arena_elo,wildbench_240612,[]
mixtral_8x7b_inst,1114.0,arena_elo,wildbench_240612,[]
dbrx_instruct,1106.0,arena_elo,wildbench_240612,[]
mistral_7b_inst_v0.2,1071.0,arena_elo,wildbench_240612,[]
tulu_2_dpo_70b,1099.0,arena_elo,wildbench_240612,[]
llama_2_70b_chat,1070.0,arena_elo,wildbench_240612,[]
qwen1.5_7b_chat,1059.0,arena_elo,wildbench_240612,[]
gpt_3.5_turbo_0125,1105.0,arena_elo,wildbench_240612,[]
llama_2_7b_chat,1012.0,arena_elo,wildbench_240612,[]
gemma_7b_it,1047.0,arena_elo,wildbench_240612,[]
gemma_2b_it,980.0,arena_elo,wildbench_240612,[]
gpt_4_turbo_0409,82.6,arena_hard,wildbench_240612,[]
gpt_4_turbo_0125,78.0,arena_hard,wildbench_240612,[]
llama_3_70b_inst,41.1,arena_hard,wildbench_240612,[]
claude_3_opus,60.4,arena_hard,wildbench_240612,[]
llama3_inst_8b_simpo,33.8,arena_hard,wildbench_240612,[]
claude_3_sonnet,46.8,arena_hard,wildbench_240612,[]
qwen1.5_72b_chat,36.1,arena_hard,wildbench_240612,[]
command_r_plus,33.1,arena_hard,wildbench_240612,[]
claude_3_haiku,41.5,arena_hard,wildbench_240612,[]
mistral_large,37.7,arena_hard,wildbench_240612,[]
starlinglm_7b_beta,23.0,arena_hard,wildbench_240612,[]
llama_3_8b_inst,20.6,arena_hard,wildbench_240612,[]
command_r,17.0,arena_hard,wildbench_240612,[]
mixtral_8x7b_inst,23.4,arena_hard,wildbench_240612,[]
dbrx_instruct,23.9,arena_hard,wildbench_240612,[]
tulu_2_dpo_70b,15.0,arena_hard,wildbench_240612,[]
llama_2_70b_chat,11.6,arena_hard,wildbench_240612,[]
gpt_3.5_turbo_0125,23.3,arena_hard,wildbench_240612,[]
llama_2_7b_chat,4.6,arena_hard,wildbench_240612,[]
gemma_7b_it,7.5,arena_hard,wildbench_240612,[]
gemma_2b_it,3.0,arena_hard,wildbench_240612,[]
gpt_4o_0513,57.5,alpacaeval2_lc,wildbench_240612,[]
gpt_4_turbo_0409,55.0,alpacaeval2_lc,wildbench_240612,[]
llama_3_70b_inst,34.4,alpacaeval2_lc,wildbench_240612,[]
claude_3_opus,40.5,alpacaeval2_lc,wildbench_240612,[]
llama3_inst_8b_simpo,44.7,alpacaeval2_lc,wildbench_240612,[]
claude_3_sonnet,34.9,alpacaeval2_lc,wildbench_240612,[]
qwen1.5_72b_chat,36.6,alpacaeval2_lc,wildbench_240612,[]
mistral_large,32.7,alpacaeval2_lc,wildbench_240612,[]
llama_3_8b_inst,22.9,alpacaeval2_lc,wildbench_240612,[]
mixtral_8x7b_inst,23.7,alpacaeval2_lc,wildbench_240612,[]
dbrx_instruct,25.4,alpacaeval2_lc,wildbench_240612,[]
mistral_7b_inst_v0.2,17.1,alpacaeval2_lc,wildbench_240612,[]
tulu_2_dpo_70b,21.2,alpacaeval2_lc,wildbench_240612,[]
llama_2_70b_chat,14.7,alpacaeval2_lc,wildbench_240612,[]
qwen1.5_7b_chat,14.7,alpacaeval2_lc,wildbench_240612,[]
llama_2_7b_chat,5.4,alpacaeval2_lc,wildbench_240612,[]
gemma_7b_it,10.4,alpacaeval2_lc,wildbench_240612,[]
gemma_2b_it,5.4,alpacaeval2_lc,wildbench_240612,[]
gpt_4o_0513,51.3,alpacav2,wildbench_240612,[]
gpt_4_turbo_0409,46.1,alpacav2,wildbench_240612,[]
llama_3_70b_inst,33.2,alpacav2,wildbench_240612,[]
claude_3_opus,29.1,alpacav2,wildbench_240612,[]
llama3_inst_8b_simpo,40.5,alpacav2,wildbench_240612,[]
claude_3_sonnet,25.6,alpacav2,wildbench_240612,[]
qwen1.5_72b_chat,26.5,alpacav2,wildbench_240612,[]
mistral_large,21.4,alpacav2,wildbench_240612,[]
llama_3_8b_inst,22.6,alpacav2,wildbench_240612,[]
mixtral_8x7b_inst,18.3,alpacav2,wildbench_240612,[]
dbrx_instruct,18.4,alpacav2,wildbench_240612,[]
mistral_7b_inst_v0.2,14.7,alpacav2,wildbench_240612,[]
tulu_2_dpo_70b,16.0,alpacav2,wildbench_240612,[]
llama_2_70b_chat,13.9,alpacav2,wildbench_240612,[]
qwen1.5_7b_chat,11.8,alpacav2,wildbench_240612,[]
llama_2_7b_chat,5.0,alpacav2,wildbench_240612,[]
gemma_7b_it,6.9,alpacav2,wildbench_240612,[]
gemma_2b_it,3.4,alpacav2,wildbench_240612,[]
pythia_1b,31.4,arc_c,olmes_260624,[]
olmo_1b,38.6,arc_c,olmes_260624,[]
tinyllama_1.1b,38.1,arc_c,olmes_260624,[]
pythia_6.7b,44.6,arc_c,olmes_260624,[]
rpj_incite_7b,45.3,arc_c,olmes_260624,[]
stablelm2_1.6b,50.6,arc_c,olmes_260624,[]
olmo_7b,46.4,arc_c,olmes_260624,[]
mpt_7b,45.7,arc_c,olmes_260624,[]
falcon_7b,49.7,arc_c,olmes_260624,[]
llama2_7b,54.2,arc_c,olmes_260624,[]
llama2_13b,67.3,arc_c,olmes_260624,[]
olmo_1.7_7b,66.9,arc_c,olmes_260624,[]
llama3_8b,79.3,arc_c,olmes_260624,[]
mistral_7b_v0.1,78.6,arc_c,olmes_260624,[]
llama3_70b,93.7,arc_c,olmes_260624,[]
pythia_1b,31.1,mmlu,olmes_260624,[]
olmo_1b,33.4,mmlu,olmes_260624,[]
tinyllama_1.1b,33.6,mmlu,olmes_260624,[]
pythia_6.7b,37.7,mmlu,olmes_260624,[]
rpj_incite_7b,40.1,mmlu,olmes_260624,[]
stablelm2_1.6b,40.4,mmlu,olmes_260624,[]
olmo_7b,40.5,mmlu,olmes_260624,[]
mpt_7b,40.6,mmlu,olmes_260624,[]
falcon_7b,42.1,mmlu,olmes_260624,[]
llama2_7b,46.2,mmlu,olmes_260624,[]
llama2_13b,55.8,mmlu,olmes_260624,[]
olmo_1.7_7b,54.4,mmlu,olmes_260624,[]
llama3_8b,66.6,mmlu,olmes_260624,[]
mistral_7b_v0.1,64.0,mmlu,olmes_260624,[]
llama3_70b,79.8,mmlu,olmes_260624,[]
pythia_1b,49.0,olmes_average,olmes_260624,[]
olmo_1b,55.1,olmes_average,olmes_260624,[]
tinyllama_1.1b,55.4,olmes_average,olmes_260624,[]
pythia_6.7b,59.1,olmes_average,olmes_260624,[]
rpj_incite_7b,62.8,olmes_average,olmes_260624,[]
stablelm2_1.6b,65.1,olmes_average,olmes_260624,[]
olmo_7b,65.3,olmes_average,olmes_260624,[]
mpt_7b,65.6,olmes_average,olmes_260624,[]
falcon_7b,66.9,olmes_average,olmes_260624,[]
llama2_7b,69.0,olmes_average,olmes_260624,[]
llama2_13b,74.0,olmes_average,olmes_260624,[]
olmo_1.7_7b,75.5,olmes_average,olmes_260624,[]
llama3_8b,78.7,olmes_average,olmes_260624,[]
mistral_7b_v0.1,79.1,olmes_average,olmes_260624,[]
llama3_70b,88.4,olmes_average,olmes_260624,[]
llama_2_70b,0.3753,mmlu_pro,mmlu_pro_240610,[]
llama_3_8b,0.3536,mmlu_pro,mmlu_pro_240610,[]
deepseekmath_instruct,0.353,mmlu_pro,mmlu_pro_240610,[]
gemma_7b,0.3373,mmlu_pro,mmlu_pro_240610,[]
mistral_7b_v0.1,0.3088,mmlu_pro,mmlu_pro_240610,[]
mistral_7b_instruct_v0.2,0.3084,mmlu_pro,mmlu_pro_240610,[]
mistral_7b_v0.2,0.3043,mmlu_pro,mmlu_pro_240610,[]
qwen1.5_7b_chat,0.2906,mmlu_pro,mmlu_pro_240610,[]
yi_6b_chat,0.2884,mmlu_pro,mmlu_pro_240610,[]
yi_6b,0.2651,mmlu_pro,mmlu_pro_240610,[]
mistral_7b_instruct_v0.1,0.2575,mmlu_pro,mmlu_pro_240610,[]
llama_2_13b,0.2534,mmlu_pro,mmlu_pro_240610,[]
llemma_7b,0.2345,mmlu_pro,mmlu_pro_240610,[]
llama_2_7b,0.2032,mmlu_pro,mmlu_pro_240610,[]
gpt_4o,0.7255,mmlu_pro,mmlu_pro_240610,[]
claude_3_opus,0.6845,mmlu_pro,mmlu_pro_240610,[]
gpt_4_turbo,0.6371,mmlu_pro,mmlu_pro_240610,[]
gemini_1.5_flash,0.5912,mmlu_pro,mmlu_pro_240610,[]
yi_large,0.5753,mmlu_pro,mmlu_pro_240610,[]
claude_3_sonnet,0.568,mmlu_pro,mmlu_pro_240610,[]
llama_3_70b_instruct,0.562,mmlu_pro,mmlu_pro_240610,[]
deepseek_v2,0.5481,mmlu_pro,mmlu_pro_240610,[]
phi_3_medium_4k_instruct,0.5348,mmlu_pro,mmlu_pro_240610,[]
llama_3_70b,0.5278,mmlu_pro,mmlu_pro_240610,[]
qwen1.5_72b_chat,0.5162,mmlu_pro,mmlu_pro_240610,[]
mammoth2_8x7b_plus,0.504,mmlu_pro,mmlu_pro_240610,[]
qwen1.5_110b,0.4993,mmlu_pro,mmlu_pro_240610,[]
mammoth2_8b_plus,0.4335,mmlu_pro,mmlu_pro_240610,[]
mixtral_8x7b_instruct_v0.1,0.4327,mmlu_pro,mmlu_pro_240610,[]
phi_3_mini_4k_instruct,0.4317,mmlu_pro,mmlu_pro_240610,[]
yi_34b,0.4303,mmlu_pro,mmlu_pro_240610,[]
mixtral_8x7b_v0.1,0.4103,mmlu_pro,mmlu_pro_240610,[]
llama_3_8b_instruct,0.4098,mmlu_pro,mmlu_pro_240610,[]
mammoth2_7b_plus,0.4085,mmlu_pro,mmlu_pro_240610,[]
qwen1.5_14b_chat,0.3802,mmlu_pro,mmlu_pro_240610,[]
c4ai_command_r_v01,0.379,mmlu_pro,mmlu_pro_240610,[]