Spaces:
Running
Running
[ | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.7026, | |
"Completeness": 0.7014, | |
"Conciseness": 0.1631, | |
"Helpfulness": 0.6784, | |
"Honesty": 0.6972, | |
"Harmlessness": 0.7026, | |
"3C3H Score": 0.6076 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.7151, | |
"Reasoning": 0.64, | |
"Orthographic and Grammatical Analysis": 0.0887, | |
"Safety": 0.4729 | |
} | |
}, | |
"Meta": { | |
"Model Name": "CohereForAI/aya-expanse-32b", | |
"License": "cc-by-nc-4.0", | |
"Revision": "main", | |
"Precision": "float16", | |
"Params": 32.0, | |
"Total Entries": 279, | |
"Successful Entries": 278, | |
"Failed Entries": 1, | |
"Success Ratio": 0.9964 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.5612, | |
"Completeness": 0.5612, | |
"Conciseness": 0.1172, | |
"Helpfulness": 0.5468, | |
"Honesty": 0.5519, | |
"Harmlessness": 0.5594, | |
"3C3H Score": 0.4829 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.5526, | |
"Reasoning": 0.5561, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.4271 | |
} | |
}, | |
"Meta": { | |
"Model Name": "CohereForAI/aya-expanse-8b", | |
"License": "cc-by-nc-4.0", | |
"Revision": "main", | |
"Precision": "float16", | |
"Params": 8.0, | |
"Total Entries": 279, | |
"Successful Entries": 278, | |
"Failed Entries": 1, | |
"Success Ratio": 0.9964 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.4648, | |
"Completeness": 0.46, | |
"Conciseness": 0.1251, | |
"Helpfulness": 0.4415, | |
"Honesty": 0.4495, | |
"Harmlessness": 0.4639, | |
"3C3H Score": 0.4008 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.5056, | |
"Reasoning": 0.3817, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.2917 | |
} | |
}, | |
"Meta": { | |
"Model Name": "FreedomIntelligence/AceGPT-13B-chat", | |
"License": "apache-2.0", | |
"Revision": "main", | |
"Precision": "float16", | |
"Params": 13.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.4158, | |
"Completeness": 0.4158, | |
"Conciseness": 0.0941, | |
"Helpfulness": 0.3817, | |
"Honesty": 0.3934, | |
"Harmlessness": 0.4158, | |
"3C3H Score": 0.3527 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.4017, | |
"Reasoning": 0.4367, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.2104 | |
} | |
}, | |
"Meta": { | |
"Model Name": "FreedomIntelligence/AceGPT-7B-chat", | |
"License": "apache-2.0", | |
"Revision": "main", | |
"Precision": "float16", | |
"Params": 7.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.5568, | |
"Completeness": 0.546, | |
"Conciseness": 0.2094, | |
"Helpfulness": 0.5302, | |
"Honesty": 0.5391, | |
"Harmlessness": 0.5568, | |
"3C3H Score": 0.4897 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.6084, | |
"Reasoning": 0.4717, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.4083 | |
} | |
}, | |
"Meta": { | |
"Model Name": "FreedomIntelligence/AceGPT-v2-8B-Chat", | |
"License": "apache-2.0", | |
"Revision": "main", | |
"Precision": "float16", | |
"Params": 8.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.1547, | |
"Completeness": 0.1439, | |
"Conciseness": 0.0369, | |
"Helpfulness": 0.116, | |
"Honesty": 0.1286, | |
"Harmlessness": 0.1538, | |
"3C3H Score": 0.1223 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.1201, | |
"Reasoning": 0.1094, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.3771 | |
} | |
}, | |
"Meta": { | |
"Model Name": "Qwen/Qwen2.5-0.5B-Instruct", | |
"License": "apache-2.0", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 0.465, | |
"Total Entries": 279, | |
"Successful Entries": 278, | |
"Failed Entries": 1, | |
"Success Ratio": 0.9964 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.4468, | |
"Completeness": 0.4432, | |
"Conciseness": 0.1278, | |
"Helpfulness": 0.4179, | |
"Honesty": 0.4271, | |
"Harmlessness": 0.4459, | |
"3C3H Score": 0.3848 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.3684, | |
"Reasoning": 0.4983, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.6812 | |
} | |
}, | |
"Meta": { | |
"Model Name": "Qwen/Qwen2.5-3B-Instruct", | |
"License": "apache-2.0", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 3.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.7192, | |
"Completeness": 0.718, | |
"Conciseness": 0.1906, | |
"Helpfulness": 0.6986, | |
"Honesty": 0.7094, | |
"Harmlessness": 0.7192, | |
"3C3H Score": 0.6258 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.6677, | |
"Reasoning": 0.7594, | |
"Orthographic and Grammatical Analysis": 0.1075, | |
"Safety": 0.6083 | |
} | |
}, | |
"Meta": { | |
"Model Name": "Qwen/Qwen2.5-72B-Instruct", | |
"License": "qwen", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 72.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.6499, | |
"Completeness": 0.6487, | |
"Conciseness": 0.2016, | |
"Helpfulness": 0.6386, | |
"Honesty": 0.638, | |
"Harmlessness": 0.6499, | |
"3C3H Score": 0.5711 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.6395, | |
"Reasoning": 0.6122, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.7792 | |
} | |
}, | |
"Meta": { | |
"Model Name": "google/gemma-2-27b-it", | |
"License": "gemma", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 27.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.589, | |
"Completeness": 0.589, | |
"Conciseness": 0.1834, | |
"Helpfulness": 0.5797, | |
"Honesty": 0.5744, | |
"Harmlessness": 0.589, | |
"3C3H Score": 0.5174 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.5462, | |
"Reasoning": 0.6011, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.7854 | |
} | |
}, | |
"Meta": { | |
"Model Name": "google/gemma-2-9b-it", | |
"License": "gemma", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 9.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.5579, | |
"Completeness": 0.5544, | |
"Conciseness": 0.1682, | |
"Helpfulness": 0.5352, | |
"Honesty": 0.5436, | |
"Harmlessness": 0.5579, | |
"3C3H Score": 0.4862 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.5925, | |
"Reasoning": 0.48, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.45 | |
} | |
}, | |
"Meta": { | |
"Model Name": "inceptionai/jais-adapted-13b-chat", | |
"License": "apache-2.0", | |
"Revision": "main", | |
"Precision": "float32", | |
"Params": 13.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.6679, | |
"Completeness": 0.6655, | |
"Conciseness": 0.1804, | |
"Helpfulness": 0.6326, | |
"Honesty": 0.652, | |
"Harmlessness": 0.6679, | |
"3C3H Score": 0.5777 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.6864, | |
"Reasoning": 0.5711, | |
"Orthographic and Grammatical Analysis": 0.0578, | |
"Safety": 0.5771 | |
} | |
}, | |
"Meta": { | |
"Model Name": "inceptionai/jais-adapted-70b-chat", | |
"License": "apache-2.0", | |
"Revision": "main", | |
"Precision": "float32", | |
"Params": 70.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.5211, | |
"Completeness": 0.5102, | |
"Conciseness": 0.1339, | |
"Helpfulness": 0.4798, | |
"Honesty": 0.5093, | |
"Harmlessness": 0.5202, | |
"3C3H Score": 0.4457 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.5144, | |
"Reasoning": 0.4844, | |
"Orthographic and Grammatical Analysis": 0.0269, | |
"Safety": 0.4312 | |
} | |
}, | |
"Meta": { | |
"Model Name": "inceptionai/jais-family-13b-chat", | |
"License": "apache-2.0", | |
"Revision": "main", | |
"Precision": "float32", | |
"Params": 13.0, | |
"Total Entries": 279, | |
"Successful Entries": 277, | |
"Failed Entries": 2, | |
"Success Ratio": 0.9928 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.3729, | |
"Completeness": 0.3669, | |
"Conciseness": 0.0887, | |
"Helpfulness": 0.3441, | |
"Honesty": 0.3543, | |
"Harmlessness": 0.3711, | |
"3C3H Score": 0.3163 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.348, | |
"Reasoning": 0.3761, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.3417 | |
} | |
}, | |
"Meta": { | |
"Model Name": "inceptionai/jais-family-2p7b-chat", | |
"License": "apache-2.0", | |
"Revision": "main", | |
"Precision": "float32", | |
"Params": 3.0, | |
"Total Entries": 279, | |
"Successful Entries": 278, | |
"Failed Entries": 1, | |
"Success Ratio": 0.9964 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.5806, | |
"Completeness": 0.5759, | |
"Conciseness": 0.1526, | |
"Helpfulness": 0.5475, | |
"Honesty": 0.5621, | |
"Harmlessness": 0.5806, | |
"3C3H Score": 0.4999 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.5812, | |
"Reasoning": 0.5239, | |
"Orthographic and Grammatical Analysis": 0.0282, | |
"Safety": 0.5187 | |
} | |
}, | |
"Meta": { | |
"Model Name": "inceptionai/jais-family-30b-8k-chat", | |
"License": "apache-2.0", | |
"Revision": "main", | |
"Precision": "float32", | |
"Params": 30.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.4755, | |
"Completeness": 0.4731, | |
"Conciseness": 0.1243, | |
"Helpfulness": 0.4522, | |
"Honesty": 0.4597, | |
"Harmlessness": 0.4755, | |
"3C3H Score": 0.41 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.4743, | |
"Reasoning": 0.4633, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.3542 | |
} | |
}, | |
"Meta": { | |
"Model Name": "inceptionai/jais-family-6p7b-chat", | |
"License": "apache-2.0", | |
"Revision": "main", | |
"Precision": "float32", | |
"Params": 7.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.6392, | |
"Completeness": 0.6129, | |
"Conciseness": 0.27, | |
"Helpfulness": 0.6016, | |
"Honesty": 0.6171, | |
"Harmlessness": 0.6383, | |
"3C3H Score": 0.5632 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.6465, | |
"Reasoning": 0.6283, | |
"Orthographic and Grammatical Analysis": 0.0591, | |
"Safety": 0.4625 | |
} | |
}, | |
"Meta": { | |
"Model Name": "meta-llama/Llama-3.1-70B-Instruct", | |
"License": "llama3.1", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 70.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.4421, | |
"Completeness": 0.4409, | |
"Conciseness": 0.1416, | |
"Helpfulness": 0.3967, | |
"Honesty": 0.4065, | |
"Harmlessness": 0.4421, | |
"3C3H Score": 0.3783 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.3826, | |
"Reasoning": 0.45, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.6625 | |
} | |
}, | |
"Meta": { | |
"Model Name": "meta-llama/Llama-3.1-8B-Instruct", | |
"License": "llama3.1", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 8.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.2359, | |
"Completeness": 0.2058, | |
"Conciseness": 0.0581, | |
"Helpfulness": 0.1781, | |
"Honesty": 0.2106, | |
"Harmlessness": 0.2341, | |
"3C3H Score": 0.1871 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.198, | |
"Reasoning": 0.2328, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.2229 | |
} | |
}, | |
"Meta": { | |
"Model Name": "meta-llama/Meta-Llama-3-8B-Instruct", | |
"License": "llama3", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 14.963, | |
"Total Entries": 279, | |
"Successful Entries": 277, | |
"Failed Entries": 2, | |
"Success Ratio": 0.9928 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.5204, | |
"Completeness": 0.1295, | |
"Conciseness": 0.4149, | |
"Helpfulness": 0.2332, | |
"Honesty": 0.4814, | |
"Harmlessness": 0.5204, | |
"3C3H Score": 0.3833 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.4053, | |
"Reasoning": 0.3806, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.8188 | |
} | |
}, | |
"Meta": { | |
"Model Name": "silma-ai/SILMA-9B-Instruct-v1.0", | |
"License": "gemma", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 9.0, | |
"Total Entries": 279, | |
"Successful Entries": 278, | |
"Failed Entries": 1, | |
"Success Ratio": 0.9964 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.542, | |
"Completeness": 0.5156, | |
"Conciseness": 0.2512, | |
"Helpfulness": 0.5033, | |
"Honesty": 0.533, | |
"Harmlessness": 0.542, | |
"3C3H Score": 0.4812 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.6009, | |
"Reasoning": 0.4825, | |
"Orthographic and Grammatical Analysis": 0.0309, | |
"Safety": 0.2583 | |
} | |
}, | |
"Meta": { | |
"Model Name": "CohereForAI/aya-23-35B", | |
"License": "cc-by-nc-4.0", | |
"Revision": "main", | |
"Precision": "float16", | |
"Params": 35.0, | |
"Total Entries": 279, | |
"Successful Entries": 278, | |
"Failed Entries": 1, | |
"Success Ratio": 0.9964 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.5878, | |
"Completeness": 0.5472, | |
"Conciseness": 0.1738, | |
"Helpfulness": 0.5594, | |
"Honesty": 0.5806, | |
"Harmlessness": 0.5833, | |
"3C3H Score": 0.5054 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.6209, | |
"Reasoning": 0.5394, | |
"Orthographic and Grammatical Analysis": 0.0269, | |
"Safety": 0.2354 | |
} | |
}, | |
"Meta": { | |
"Model Name": "CohereForAI/c4ai-command-r-08-2024", | |
"License": "cc-by-nc-4.0", | |
"Revision": "main", | |
"Precision": "float16", | |
"Params": 32.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.6282, | |
"Completeness": 0.6221, | |
"Conciseness": 0.1733, | |
"Helpfulness": 0.5978, | |
"Honesty": 0.6119, | |
"Harmlessness": 0.6282, | |
"3C3H Score": 0.5436 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.6891, | |
"Reasoning": 0.5333, | |
"Orthographic and Grammatical Analysis": 0.0264, | |
"Safety": 0.2521 | |
} | |
}, | |
"Meta": { | |
"Model Name": "CohereForAI/c4ai-command-r-v01", | |
"License": "cc-by-nc-4.0", | |
"Revision": "main", | |
"Precision": "float16", | |
"Params": 35.0, | |
"Total Entries": 279, | |
"Successful Entries": 277, | |
"Failed Entries": 2, | |
"Success Ratio": 0.9928 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.5297, | |
"Completeness": 0.4679, | |
"Conciseness": 0.2876, | |
"Helpfulness": 0.4694, | |
"Honesty": 0.5097, | |
"Harmlessness": 0.5297, | |
"3C3H Score": 0.4657 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.5958, | |
"Reasoning": 0.4296, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.3171 | |
} | |
}, | |
"Meta": { | |
"Model Name": "FreedomIntelligence/AceGPT-v1.5-13B-Chat", | |
"License": "apache-2.0", | |
"Revision": "main", | |
"Precision": "float32", | |
"Params": 13.0, | |
"Total Entries": 279, | |
"Successful Entries": 275, | |
"Failed Entries": 4, | |
"Success Ratio": 0.9857 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.6717, | |
"Completeness": 0.6642, | |
"Conciseness": 0.2906, | |
"Helpfulness": 0.6479, | |
"Honesty": 0.6657, | |
"Harmlessness": 0.6717, | |
"3C3H Score": 0.602 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.7136, | |
"Reasoning": 0.5694, | |
"Orthographic and Grammatical Analysis": 0.0632, | |
"Safety": 0.75 | |
} | |
}, | |
"Meta": { | |
"Model Name": "FreedomIntelligence/AceGPT-v2-70B-Chat", | |
"License": "apache-2.0", | |
"Revision": "main", | |
"Precision": "float16", | |
"Params": 70.0, | |
"Total Entries": 279, | |
"Successful Entries": 267, | |
"Failed Entries": 12, | |
"Success Ratio": 0.957 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.7103, | |
"Completeness": 0.7091, | |
"Conciseness": 0.1912, | |
"Helpfulness": 0.6888, | |
"Honesty": 0.7036, | |
"Harmlessness": 0.7103, | |
"3C3H Score": 0.6189 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.6862, | |
"Reasoning": 0.7472, | |
"Orthographic and Grammatical Analysis": 0.0282, | |
"Safety": 0.5482 | |
} | |
}, | |
"Meta": { | |
"Model Name": "MaziyarPanahi/calme-2.2-qwen2.5-72b", | |
"License": "tongyi-qianwen", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 72.0, | |
"Total Entries": 279, | |
"Successful Entries": 275, | |
"Failed Entries": 4, | |
"Success Ratio": 0.9857 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.2848, | |
"Completeness": 0.2848, | |
"Conciseness": 0.088, | |
"Helpfulness": 0.2553, | |
"Honesty": 0.2531, | |
"Harmlessness": 0.2833, | |
"3C3H Score": 0.2416 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.2384, | |
"Reasoning": 0.2723, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.5486 | |
} | |
}, | |
"Meta": { | |
"Model Name": "Qwen/Qwen2.5-1.5B-Instruct", | |
"License": "qwen", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 1.443, | |
"Total Entries": 279, | |
"Successful Entries": 268, | |
"Failed Entries": 11, | |
"Success Ratio": 0.9606 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.6146, | |
"Completeness": 0.6059, | |
"Conciseness": 0.1859, | |
"Helpfulness": 0.5914, | |
"Honesty": 0.5988, | |
"Harmlessness": 0.6146, | |
"3C3H Score": 0.5352 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.566, | |
"Reasoning": 0.6684, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.6009 | |
} | |
}, | |
"Meta": { | |
"Model Name": "Qwen/Qwen2.5-14B-Instruct", | |
"License": "apache-2.0", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 14.0, | |
"Total Entries": 279, | |
"Successful Entries": 269, | |
"Failed Entries": 10, | |
"Success Ratio": 0.9642 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.8831, | |
"Completeness": 0.8781, | |
"Conciseness": 0.3327, | |
"Helpfulness": 0.8697, | |
"Honesty": 0.8778, | |
"Harmlessness": 0.8831, | |
"3C3H Score": 0.7874 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.7896, | |
"Reasoning": 0.77, | |
"Orthographic and Grammatical Analysis": 0.7487, | |
"Safety": 0.9013 | |
} | |
}, | |
"Meta": { | |
"Model Name": "claude-3-5-sonnet-20241022", | |
"License": "Proprietary", | |
"Revision": "UNK", | |
"Precision": "UNK", | |
"Params": "UNK", | |
"Total Entries": 279, | |
"Successful Entries": 268, | |
"Failed Entries": 11, | |
"Success Ratio": 0.9606 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.6389, | |
"Completeness": 0.6377, | |
"Conciseness": 0.1938, | |
"Helpfulness": 0.6162, | |
"Honesty": 0.6316, | |
"Harmlessness": 0.6389, | |
"3C3H Score": 0.5595 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.6376, | |
"Reasoning": 0.5767, | |
"Orthographic and Grammatical Analysis": 0.0591, | |
"Safety": 0.6854 | |
} | |
}, | |
"Meta": { | |
"Model Name": "claude-3-haiku-20240307", | |
"License": "Proprietary", | |
"Revision": "UNK", | |
"Precision": "UNK", | |
"Params": "UNK", | |
"Total Entries": 279, | |
"Successful Entries": 276, | |
"Failed Entries": 3, | |
"Success Ratio": 0.9892 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.2603, | |
"Completeness": 0.2311, | |
"Conciseness": 0.0721, | |
"Helpfulness": 0.2132, | |
"Honesty": 0.2476, | |
"Harmlessness": 0.2594, | |
"3C3H Score": 0.214 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.224, | |
"Reasoning": 0.2934, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.1771 | |
} | |
}, | |
"Meta": { | |
"Model Name": "meta-llama/Meta-Llama-3-70B-Instruct", | |
"License": "llama3", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 70.0, | |
"Total Entries": 279, | |
"Successful Entries": 274, | |
"Failed Entries": 5, | |
"Success Ratio": 0.9821 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.721, | |
"Completeness": 0.7138, | |
"Conciseness": 0.2298, | |
"Helpfulness": 0.7041, | |
"Honesty": 0.7141, | |
"Harmlessness": 0.721, | |
"3C3H Score": 0.634 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.6923, | |
"Reasoning": 0.7312, | |
"Orthographic and Grammatical Analysis": 0.1909, | |
"Safety": 0.5229 | |
} | |
}, | |
"Meta": { | |
"Model Name": "gpt-4o-mini", | |
"License": "Proprietary", | |
"Revision": "UNK", | |
"Precision": "UNK", | |
"Params": "UNK", | |
"Total Entries": 279, | |
"Successful Entries": 276, | |
"Failed Entries": 3, | |
"Success Ratio": 0.9892 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.8375, | |
"Completeness": 0.8291, | |
"Conciseness": 0.2894, | |
"Helpfulness": 0.8099, | |
"Honesty": 0.83, | |
"Harmlessness": 0.8375, | |
"3C3H Score": 0.7389 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.8014, | |
"Reasoning": 0.7455, | |
"Orthographic and Grammatical Analysis": 0.5027, | |
"Safety": 0.6063 | |
} | |
}, | |
"Meta": { | |
"Model Name": "gpt-4o", | |
"License": "Proprietary", | |
"Revision": "UNK", | |
"Precision": "UNK", | |
"Params": "UNK", | |
"Total Entries": 279, | |
"Successful Entries": 277, | |
"Failed Entries": 2, | |
"Success Ratio": 0.9928 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.7194, | |
"Completeness": 0.7181, | |
"Conciseness": 0.1927, | |
"Helpfulness": 0.6921, | |
"Honesty": 0.7099, | |
"Harmlessness": 0.7194, | |
"3C3H Score": 0.6253 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.6611, | |
"Reasoning": 0.7922, | |
"Orthographic and Grammatical Analysis": 0.0736, | |
"Safety": 0.5741 | |
} | |
}, | |
"Meta": { | |
"Model Name": "rombodawg/Rombos-LLM-V2.5-Qwen-72b", | |
"License": "qwen", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 72.0, | |
"Total Entries": 279, | |
"Successful Entries": 272, | |
"Failed Entries": 7, | |
"Success Ratio": 0.9749 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.7121, | |
"Completeness": 0.7097, | |
"Conciseness": 0.1876, | |
"Helpfulness": 0.6882, | |
"Honesty": 0.6968, | |
"Harmlessness": 0.7121, | |
"3C3H Score": 0.6177 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.6815, | |
"Reasoning": 0.7567, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.5667 | |
} | |
}, | |
"Meta": { | |
"Model Name": "MaziyarPanahi/calme-2.1-qwen2.5-72b", | |
"License": "tongyi-qianwen", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 72.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.3285, | |
"Completeness": 0.3225, | |
"Conciseness": 0.0869, | |
"Helpfulness": 0.2987, | |
"Honesty": 0.3081, | |
"Harmlessness": 0.3279, | |
"3C3H Score": 0.2788 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.2945, | |
"Reasoning": 0.3667, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.2625 | |
} | |
}, | |
"Meta": { | |
"Model Name": "inceptionai/jais-family-1p3b-chat", | |
"License": "apache-2.0", | |
"Revision": "main", | |
"Precision": "float32", | |
"Params": 1.0, | |
"Total Entries": 279, | |
"Successful Entries": 277, | |
"Failed Entries": 2, | |
"Success Ratio": 0.9928 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.5695, | |
"Completeness": 0.5624, | |
"Conciseness": 0.1577, | |
"Helpfulness": 0.5312, | |
"Honesty": 0.554, | |
"Harmlessness": 0.5695, | |
"3C3H Score": 0.4907 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.5702, | |
"Reasoning": 0.5139, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.5604 | |
} | |
}, | |
"Meta": { | |
"Model Name": "inceptionai/jais-family-30b-16k-chat", | |
"License": "apache-2.0", | |
"Revision": "main", | |
"Precision": "float32", | |
"Params": 30.0, | |
"Total Entries": 279, | |
"Successful Entries": 278, | |
"Failed Entries": 1, | |
"Success Ratio": 0.9964 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.1966, | |
"Completeness": 0.1535, | |
"Conciseness": 0.0285, | |
"Helpfulness": 0.1196, | |
"Honesty": 0.1643, | |
"Harmlessness": 0.1957, | |
"3C3H Score": 0.143 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.1577, | |
"Reasoning": 0.1872, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.0875 | |
} | |
}, | |
"Meta": { | |
"Model Name": "inceptionai/jais-family-590m-chat", | |
"License": "apache-2.0", | |
"Revision": "main", | |
"Precision": "float32", | |
"Params": 0.719, | |
"Total Entries": 279, | |
"Successful Entries": 278, | |
"Failed Entries": 1, | |
"Success Ratio": 0.9964 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.0791, | |
"Completeness": 0.0504, | |
"Conciseness": 0.0216, | |
"Helpfulness": 0.0414, | |
"Honesty": 0.0549, | |
"Harmlessness": 0.0755, | |
"3C3H Score": 0.0538 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.0293, | |
"Reasoning": 0.0756, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.2417 | |
} | |
}, | |
"Meta": { | |
"Model Name": "meta-llama/Llama-3.2-1B-Instruct", | |
"License": "llama3.2", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 1.0, | |
"Total Entries": 279, | |
"Successful Entries": 278, | |
"Failed Entries": 1, | |
"Success Ratio": 0.9964 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.2736, | |
"Completeness": 0.2616, | |
"Conciseness": 0.0792, | |
"Helpfulness": 0.1971, | |
"Honesty": 0.2315, | |
"Harmlessness": 0.2727, | |
"3C3H Score": 0.2193 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.2133, | |
"Reasoning": 0.28, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.3771 | |
} | |
}, | |
"Meta": { | |
"Model Name": "meta-llama/Llama-3.2-3B-Instruct", | |
"License": "llama3.2", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 3.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.6296, | |
"Completeness": 0.6165, | |
"Conciseness": 0.2258, | |
"Helpfulness": 0.5923, | |
"Honesty": 0.6123, | |
"Harmlessness": 0.6296, | |
"3C3H Score": 0.551 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.6538, | |
"Reasoning": 0.6033, | |
"Orthographic and Grammatical Analysis": 0.0309, | |
"Safety": 0.375 | |
} | |
}, | |
"Meta": { | |
"Model Name": "meta-llama/Llama-3.2-90B-Vision-Instruct", | |
"License": "llama3.2", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 90.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.6858, | |
"Completeness": 0.6511, | |
"Conciseness": 0.345, | |
"Helpfulness": 0.635, | |
"Honesty": 0.6747, | |
"Harmlessness": 0.6858, | |
"3C3H Score": 0.6129 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.7062, | |
"Reasoning": 0.6394, | |
"Orthographic and Grammatical Analysis": 0.0215, | |
"Safety": 0.7167 | |
} | |
}, | |
"Meta": { | |
"Model Name": "meta-llama/Llama-3.3-70B-Instruct", | |
"License": "llama3.3", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 70.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.3321, | |
"Completeness": 0.1434, | |
"Conciseness": 0.0403, | |
"Helpfulness": 0.1359, | |
"Honesty": 0.2631, | |
"Harmlessness": 0.3295, | |
"3C3H Score": 0.2074 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.2891, | |
"Reasoning": 0.1744, | |
"Orthographic and Grammatical Analysis": 0.0175, | |
"Safety": 0.0 | |
} | |
}, | |
"Meta": { | |
"Model Name": "stabilityai/ar-stablelm-2-chat", | |
"License": "other", | |
"Revision": "main", | |
"Precision": "float32", | |
"Params": 2.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.5317, | |
"Completeness": 0.4875, | |
"Conciseness": 0.1711, | |
"Helpfulness": 0.4271, | |
"Honesty": 0.4904, | |
"Harmlessness": 0.5317, | |
"3C3H Score": 0.4399 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.4885, | |
"Reasoning": 0.4211, | |
"Orthographic and Grammatical Analysis": 0.0323, | |
"Safety": 0.7708 | |
} | |
}, | |
"Meta": { | |
"Model Name": "utter-project/EuroLLM-9B-Instruct", | |
"License": "apache-2.0", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 9.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.6619, | |
"Completeness": 0.6356, | |
"Conciseness": 0.1938, | |
"Helpfulness": 0.6353, | |
"Honesty": 0.6526, | |
"Harmlessness": 0.661, | |
"3C3H Score": 0.5734 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.7327, | |
"Reasoning": 0.5506, | |
"Orthographic and Grammatical Analysis": 0.0538, | |
"Safety": 0.2458 | |
} | |
}, | |
"Meta": { | |
"Model Name": "CohereForAI/c4ai-command-r-plus-08-2024", | |
"License": "cc-by-nc-4.0", | |
"Revision": "main", | |
"Precision": "float16", | |
"Params": 104.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.4791, | |
"Completeness": 0.4433, | |
"Conciseness": 0.2109, | |
"Helpfulness": 0.434, | |
"Honesty": 0.466, | |
"Harmlessness": 0.4773, | |
"3C3H Score": 0.4184 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.4969, | |
"Reasoning": 0.4778, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.2437 | |
} | |
}, | |
"Meta": { | |
"Model Name": "CohereForAI/aya-23-8B", | |
"License": "cc-by-nc-4.0", | |
"Revision": "main", | |
"Precision": "float16", | |
"Params": 8.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.4636, | |
"Completeness": 0.4409, | |
"Conciseness": 0.1532, | |
"Helpfulness": 0.4062, | |
"Honesty": 0.4379, | |
"Harmlessness": 0.4636, | |
"3C3H Score": 0.3942 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.4683, | |
"Reasoning": 0.4106, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.3771 | |
} | |
}, | |
"Meta": { | |
"Model Name": "inceptionai/jais-adapted-7b-chat", | |
"License": "apache-2.0", | |
"Revision": "main", | |
"Precision": "float32", | |
"Params": 7.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.6822, | |
"Completeness": 0.6643, | |
"Conciseness": 0.2398, | |
"Helpfulness": 0.6461, | |
"Honesty": 0.6723, | |
"Harmlessness": 0.6813, | |
"3C3H Score": 0.5977 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.7304, | |
"Reasoning": 0.5472, | |
"Orthographic and Grammatical Analysis": 0.2124, | |
"Safety": 0.3687 | |
} | |
}, | |
"Meta": { | |
"Model Name": "CohereForAI/c4ai-command-r-plus", | |
"License": "cc-by-nc-4.0", | |
"Revision": "main", | |
"Precision": "float16", | |
"Params": 104.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.5144, | |
"Completeness": 0.5096, | |
"Conciseness": 0.1304, | |
"Helpfulness": 0.4829, | |
"Honesty": 0.4922, | |
"Harmlessness": 0.5135, | |
"3C3H Score": 0.4405 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.4967, | |
"Reasoning": 0.5361, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.3375 | |
} | |
}, | |
"Meta": { | |
"Model Name": "CohereForAI/c4ai-command-r7b-12-2024", | |
"License": "cc-by-nc-4.0", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 8.0, | |
"Total Entries": 279, | |
"Successful Entries": 278, | |
"Failed Entries": 1, | |
"Success Ratio": 0.9964 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.6511, | |
"Completeness": 0.6499, | |
"Conciseness": 0.1948, | |
"Helpfulness": 0.634, | |
"Honesty": 0.6415, | |
"Harmlessness": 0.6505, | |
"3C3H Score": 0.5703 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.6214, | |
"Reasoning": 0.6911, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.6125 | |
} | |
}, | |
"Meta": { | |
"Model Name": "Qwen/Qwen2.5-32B-Instruct", | |
"License": "apache-2.0", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 32.0, | |
"Total Entries": 279, | |
"Successful Entries": 278, | |
"Failed Entries": 1, | |
"Success Ratio": 0.9964 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.546, | |
"Completeness": 0.5448, | |
"Conciseness": 0.1559, | |
"Helpfulness": 0.5233, | |
"Honesty": 0.532, | |
"Harmlessness": 0.5457, | |
"3C3H Score": 0.4746 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.482, | |
"Reasoning": 0.6222, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.6 | |
} | |
}, | |
"Meta": { | |
"Model Name": "Qwen/Qwen2.5-7B-Instruct", | |
"License": "apache-2.0", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 7.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.4676, | |
"Completeness": 0.464, | |
"Conciseness": 0.1361, | |
"Helpfulness": 0.4047, | |
"Honesty": 0.4158, | |
"Harmlessness": 0.4658, | |
"3C3H Score": 0.3923 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.427, | |
"Reasoning": 0.4289, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.6 | |
} | |
}, | |
"Meta": { | |
"Model Name": "meta-llama/Llama-3.2-11B-Vision-Instruct", | |
"License": "llama3.2", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 11.0, | |
"Total Entries": 279, | |
"Successful Entries": 278, | |
"Failed Entries": 1, | |
"Success Ratio": 0.9964 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.5863, | |
"Completeness": 0.5803, | |
"Conciseness": 0.2338, | |
"Helpfulness": 0.5659, | |
"Honesty": 0.5782, | |
"Harmlessness": 0.5854, | |
"3C3H Score": 0.5217 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.5484, | |
"Reasoning": 0.6389, | |
"Orthographic and Grammatical Analysis": 0.0188, | |
"Safety": 0.6583 | |
} | |
}, | |
"Meta": { | |
"Model Name": "FreedomIntelligence/AceGPT-v2-32B-Chat", | |
"License": "apache-2.0", | |
"Revision": "main", | |
"Precision": "float16", | |
"Params": 32.0, | |
"Total Entries": 279, | |
"Successful Entries": 278, | |
"Failed Entries": 1, | |
"Success Ratio": 0.9964 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.4277, | |
"Completeness": 0.3955, | |
"Conciseness": 0.0687, | |
"Helpfulness": 0.3127, | |
"Honesty": 0.3668, | |
"Harmlessness": 0.4232, | |
"3C3H Score": 0.3324 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.3284, | |
"Reasoning": 0.4578, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.4083 | |
} | |
}, | |
"Meta": { | |
"Model Name": "Qwen/QwQ-32B-Preview", | |
"License": "apache-2.0", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 32.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.6558, | |
"Completeness": 0.6486, | |
"Conciseness": 0.1895, | |
"Helpfulness": 0.6276, | |
"Honesty": 0.6402, | |
"Harmlessness": 0.6552, | |
"3C3H Score": 0.5695 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.6239, | |
"Reasoning": 0.7094, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.5167 | |
} | |
}, | |
"Meta": { | |
"Model Name": "maldv/Qwentile2.5-32B-Instruct", | |
"License": "Open", | |
"Revision": "main", | |
"Precision": "float16", | |
"Params": 32.0, | |
"Total Entries": 279, | |
"Successful Entries": 277, | |
"Failed Entries": 2, | |
"Success Ratio": 0.9928 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.8189, | |
"Completeness": 0.8189, | |
"Conciseness": 0.2113, | |
"Helpfulness": 0.7953, | |
"Honesty": 0.8132, | |
"Harmlessness": 0.8189, | |
"3C3H Score": 0.7128 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.7792, | |
"Reasoning": 0.7222, | |
"Orthographic and Grammatical Analysis": 0.5202, | |
"Safety": 0.4708 | |
} | |
}, | |
"Meta": { | |
"Model Name": "deepseek-chat", | |
"License": "Proprietary", | |
"Revision": "UNK", | |
"Precision": "UNK", | |
"Params": "UNK", | |
"Total Entries": 279, | |
"Successful Entries": 278, | |
"Failed Entries": 1, | |
"Success Ratio": 0.9964 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.7443, | |
"Completeness": 0.7336, | |
"Conciseness": 0.3056, | |
"Helpfulness": 0.7234, | |
"Honesty": 0.733, | |
"Harmlessness": 0.7443, | |
"3C3H Score": 0.664 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.7161, | |
"Reasoning": 0.715, | |
"Orthographic and Grammatical Analysis": 0.2352, | |
"Safety": 0.7396 | |
} | |
}, | |
"Meta": { | |
"Model Name": "claude-3-5-haiku-20241022", | |
"License": "Proprietary", | |
"Revision": "UNK", | |
"Precision": "UNK", | |
"Params": "UNK", | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.5914, | |
"Completeness": 0.589, | |
"Conciseness": 0.1974, | |
"Helpfulness": 0.5648, | |
"Honesty": 0.5792, | |
"Harmlessness": 0.5914, | |
"3C3H Score": 0.5189 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.5998, | |
"Reasoning": 0.5878, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.4458 | |
} | |
}, | |
"Meta": { | |
"Model Name": "gpt-3.5-turbo-0125", | |
"License": "Proprietary", | |
"Revision": "UNK", | |
"Precision": "UNK", | |
"Params": "UNK", | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.7422, | |
"Completeness": 0.7422, | |
"Conciseness": 0.2146, | |
"Helpfulness": 0.7224, | |
"Honesty": 0.7332, | |
"Harmlessness": 0.7422, | |
"3C3H Score": 0.6495 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.6476, | |
"Reasoning": 0.805, | |
"Orthographic and Grammatical Analysis": 0.2204, | |
"Safety": 0.7458 | |
} | |
}, | |
"Meta": { | |
"Model Name": "o1-mini-2024-09-12", | |
"License": "Proprietary", | |
"Revision": "UNK", | |
"Precision": "UNK", | |
"Params": "UNK", | |
"Total Entries": 279, | |
"Successful Entries": 278, | |
"Failed Entries": 1, | |
"Success Ratio": 0.9964 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.9271, | |
"Completeness": 0.9247, | |
"Conciseness": 0.3465, | |
"Helpfulness": 0.9119, | |
"Honesty": 0.9226, | |
"Harmlessness": 0.9271, | |
"3C3H Score": 0.8267 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.8157, | |
"Reasoning": 0.8478, | |
"Orthographic and Grammatical Analysis": 0.8266, | |
"Safety": 0.8313 | |
} | |
}, | |
"Meta": { | |
"Model Name": "o1-2024-12-17", | |
"License": "Proprietary", | |
"Revision": "UNK", | |
"Precision": "UNK", | |
"Params": "UNK", | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.8029, | |
"Completeness": 0.7921, | |
"Conciseness": 0.2733, | |
"Helpfulness": 0.7838, | |
"Honesty": 0.7999, | |
"Harmlessness": 0.8029, | |
"3C3H Score": 0.7091 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.7013, | |
"Reasoning": 0.8422, | |
"Orthographic and Grammatical Analysis": 0.379, | |
"Safety": 0.7812 | |
} | |
}, | |
"Meta": { | |
"Model Name": "o3-mini-2025-01-31", | |
"License": "Proprietary", | |
"Revision": "UNK", | |
"Precision": "UNK", | |
"Params": "UNK", | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.5484, | |
"Completeness": 0.546, | |
"Conciseness": 0.1532, | |
"Helpfulness": 0.5251, | |
"Honesty": 0.5367, | |
"Harmlessness": 0.5484, | |
"3C3H Score": 0.4763 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.4778, | |
"Reasoning": 0.6594, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.5167 | |
} | |
}, | |
"Meta": { | |
"Model Name": "1024m/PHI-4-Hindi-4bit", | |
"License": "Open", | |
"Revision": "main", | |
"Precision": "4bit", | |
"Params": 14.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.6141, | |
"Completeness": 0.583, | |
"Conciseness": 0.2327, | |
"Helpfulness": 0.5573, | |
"Honesty": 0.5893, | |
"Harmlessness": 0.6132, | |
"3C3H Score": 0.5316 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.6146, | |
"Reasoning": 0.4711, | |
"Orthographic and Grammatical Analysis": 0.2124, | |
"Safety": 0.6188 | |
} | |
}, | |
"Meta": { | |
"Model Name": "ALLaM-AI/ALLaM-7B-Instruct-preview", | |
"License": "apache-2.0", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 7.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.6464, | |
"Completeness": 0.5364, | |
"Conciseness": 0.2649, | |
"Helpfulness": 0.5792, | |
"Honesty": 0.629, | |
"Harmlessness": 0.6419, | |
"3C3H Score": 0.5496 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.5943, | |
"Reasoning": 0.6889, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.5375 | |
} | |
}, | |
"Meta": { | |
"Model Name": "malhajar/Shahin-v0.1", | |
"License": "Open", | |
"Revision": "main", | |
"Precision": "float16", | |
"Params": 27.519, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.4588, | |
"Completeness": 0.4468, | |
"Conciseness": 0.126, | |
"Helpfulness": 0.3987, | |
"Honesty": 0.428, | |
"Harmlessness": 0.4567, | |
"3C3H Score": 0.3859 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.4495, | |
"Reasoning": 0.4589, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.2229 | |
} | |
}, | |
"Meta": { | |
"Model Name": "mistralai/Ministral-8B-Instruct-2410", | |
"License": "mrl", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 8.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.0983, | |
"Completeness": 0.0899, | |
"Conciseness": 0.0192, | |
"Helpfulness": 0.0647, | |
"Honesty": 0.08, | |
"Harmlessness": 0.0974, | |
"3C3H Score": 0.0749 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.08, | |
"Reasoning": 0.1156, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.0 | |
} | |
}, | |
"Meta": { | |
"Model Name": "mistralai/Mistral-7B-Instruct-v0.2", | |
"License": "apache-2.0", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 7.0, | |
"Total Entries": 279, | |
"Successful Entries": 278, | |
"Failed Entries": 1, | |
"Success Ratio": 0.9964 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.1971, | |
"Completeness": 0.1505, | |
"Conciseness": 0.0218, | |
"Helpfulness": 0.1045, | |
"Honesty": 0.1517, | |
"Harmlessness": 0.1953, | |
"3C3H Score": 0.1368 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.1523, | |
"Reasoning": 0.1339, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.2417 | |
} | |
}, | |
"Meta": { | |
"Model Name": "mistralai/Mistral-7B-Instruct-v0.3", | |
"License": "apache-2.0", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 7.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.7814, | |
"Completeness": 0.773, | |
"Conciseness": 0.2237, | |
"Helpfulness": 0.7455, | |
"Honesty": 0.7733, | |
"Harmlessness": 0.7805, | |
"3C3H Score": 0.6796 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.7534, | |
"Reasoning": 0.6583, | |
"Orthographic and Grammatical Analysis": 0.3817, | |
"Safety": 0.6563 | |
} | |
}, | |
"Meta": { | |
"Model Name": "mistral-saba-2502", | |
"License": "Proprietary", | |
"Revision": "UNK", | |
"Precision": "UNK", | |
"Params": "UNK", | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.7085, | |
"Completeness": 0.7013, | |
"Conciseness": 0.2148, | |
"Helpfulness": 0.6897, | |
"Honesty": 0.6998, | |
"Harmlessness": 0.7085, | |
"3C3H Score": 0.6204 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.728, | |
"Reasoning": 0.695, | |
"Orthographic and Grammatical Analysis": 0.0847, | |
"Safety": 0.3479 | |
} | |
}, | |
"Meta": { | |
"Model Name": "mistralai/Mistral-Large-Instruct-2411", | |
"License": "mrl", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 123.0, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.3059, | |
"Completeness": 0.2736, | |
"Conciseness": 0.1036, | |
"Helpfulness": 0.2267, | |
"Honesty": 0.2622, | |
"Harmlessness": 0.3059, | |
"3C3H Score": 0.2463 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.2335, | |
"Reasoning": 0.2822, | |
"Orthographic and Grammatical Analysis": 0.0, | |
"Safety": 0.5917 | |
} | |
}, | |
"Meta": { | |
"Model Name": "silma-ai/SILMA-Kashif-2B-Instruct-v1.0", | |
"License": "Gemma", | |
"Revision": "main", | |
"Precision": "bfloat16", | |
"Params": 2.453, | |
"Total Entries": 279, | |
"Successful Entries": 279, | |
"Failed Entries": 0, | |
"Success Ratio": 1.0 | |
} | |
}, | |
{ | |
"claude-3.5-sonnet Scores": { | |
"3C3H Scores": { | |
"Correctness": 0.8789, | |
"Completeness": 0.8777, | |
"Conciseness": 0.292, | |
"Helpfulness": 0.8627, | |
"Honesty": 0.8726, | |
"Harmlessness": 0.8789, | |
"3C3H Score": 0.7771 | |
}, | |
"Tasks Scores": { | |
"Question Answering (QA)": 0.7845, | |
"Reasoning": 0.8083, | |
"Orthographic and Grammatical Analysis": 0.6828, | |
"Safety": 0.75 | |
} | |
}, | |
"Meta": { | |
"Model Name": "claude-3-7-sonnet-20250219", | |
"License": "Proprietary", | |
"Revision": "UNK", | |
"Precision": "UNK", | |
"Params": "UNK", | |
"Total Entries": 279, | |
"Successful Entries": 278, | |
"Failed Entries": 1, | |
"Success Ratio": 0.9964 | |
} | |
}, | |
{ | |
"_last_sync_timestamp": "2025-02-26T09:52:20.282243" | |
} | |
] |