alielfilali01's picture
Update assets/results/results.json
bf1f6f2 verified
[
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.7026,
"Completeness": 0.7014,
"Conciseness": 0.1631,
"Helpfulness": 0.6784,
"Honesty": 0.6972,
"Harmlessness": 0.7026,
"3C3H Score": 0.6076
},
"Tasks Scores": {
"Question Answering (QA)": 0.7151,
"Reasoning": 0.64,
"Orthographic and Grammatical Analysis": 0.0887,
"Safety": 0.4729
}
},
"Meta": {
"Model Name": "CohereForAI/aya-expanse-32b",
"License": "cc-by-nc-4.0",
"Revision": "main",
"Precision": "float16",
"Params": 32.0,
"Total Entries": 279,
"Successful Entries": 278,
"Failed Entries": 1,
"Success Ratio": 0.9964
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.5612,
"Completeness": 0.5612,
"Conciseness": 0.1172,
"Helpfulness": 0.5468,
"Honesty": 0.5519,
"Harmlessness": 0.5594,
"3C3H Score": 0.4829
},
"Tasks Scores": {
"Question Answering (QA)": 0.5526,
"Reasoning": 0.5561,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.4271
}
},
"Meta": {
"Model Name": "CohereForAI/aya-expanse-8b",
"License": "cc-by-nc-4.0",
"Revision": "main",
"Precision": "float16",
"Params": 8.0,
"Total Entries": 279,
"Successful Entries": 278,
"Failed Entries": 1,
"Success Ratio": 0.9964
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.4648,
"Completeness": 0.46,
"Conciseness": 0.1251,
"Helpfulness": 0.4415,
"Honesty": 0.4495,
"Harmlessness": 0.4639,
"3C3H Score": 0.4008
},
"Tasks Scores": {
"Question Answering (QA)": 0.5056,
"Reasoning": 0.3817,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.2917
}
},
"Meta": {
"Model Name": "FreedomIntelligence/AceGPT-13B-chat",
"License": "apache-2.0",
"Revision": "main",
"Precision": "float16",
"Params": 13.0,
"Total Entries": 279,
"Successful Entries": 279,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.4158,
"Completeness": 0.4158,
"Conciseness": 0.0941,
"Helpfulness": 0.3817,
"Honesty": 0.3934,
"Harmlessness": 0.4158,
"3C3H Score": 0.3527
},
"Tasks Scores": {
"Question Answering (QA)": 0.4017,
"Reasoning": 0.4367,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.2104
}
},
"Meta": {
"Model Name": "FreedomIntelligence/AceGPT-7B-chat",
"License": "apache-2.0",
"Revision": "main",
"Precision": "float16",
"Params": 7.0,
"Total Entries": 279,
"Successful Entries": 279,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.5568,
"Completeness": 0.546,
"Conciseness": 0.2094,
"Helpfulness": 0.5302,
"Honesty": 0.5391,
"Harmlessness": 0.5568,
"3C3H Score": 0.4897
},
"Tasks Scores": {
"Question Answering (QA)": 0.6084,
"Reasoning": 0.4717,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.4083
}
},
"Meta": {
"Model Name": "FreedomIntelligence/AceGPT-v2-8B-Chat",
"License": "apache-2.0",
"Revision": "main",
"Precision": "float16",
"Params": 8.0,
"Total Entries": 279,
"Successful Entries": 279,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.1547,
"Completeness": 0.1439,
"Conciseness": 0.0369,
"Helpfulness": 0.116,
"Honesty": 0.1286,
"Harmlessness": 0.1538,
"3C3H Score": 0.1223
},
"Tasks Scores": {
"Question Answering (QA)": 0.1201,
"Reasoning": 0.1094,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.3771
}
},
"Meta": {
"Model Name": "Qwen/Qwen2.5-0.5B-Instruct",
"License": "apache-2.0",
"Revision": "main",
"Precision": "bfloat16",
"Params": 0.465,
"Total Entries": 279,
"Successful Entries": 278,
"Failed Entries": 1,
"Success Ratio": 0.9964
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.4468,
"Completeness": 0.4432,
"Conciseness": 0.1278,
"Helpfulness": 0.4179,
"Honesty": 0.4271,
"Harmlessness": 0.4459,
"3C3H Score": 0.3848
},
"Tasks Scores": {
"Question Answering (QA)": 0.3684,
"Reasoning": 0.4983,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.6812
}
},
"Meta": {
"Model Name": "Qwen/Qwen2.5-3B-Instruct",
"License": "apache-2.0",
"Revision": "main",
"Precision": "bfloat16",
"Params": 3.0,
"Total Entries": 279,
"Successful Entries": 279,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.7192,
"Completeness": 0.718,
"Conciseness": 0.1906,
"Helpfulness": 0.6986,
"Honesty": 0.7094,
"Harmlessness": 0.7192,
"3C3H Score": 0.6258
},
"Tasks Scores": {
"Question Answering (QA)": 0.6677,
"Reasoning": 0.7594,
"Orthographic and Grammatical Analysis": 0.1075,
"Safety": 0.6083
}
},
"Meta": {
"Model Name": "Qwen/Qwen2.5-72B-Instruct",
"License": "qwen",
"Revision": "main",
"Precision": "bfloat16",
"Params": 72.0,
"Total Entries": 279,
"Successful Entries": 279,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.6499,
"Completeness": 0.6487,
"Conciseness": 0.2016,
"Helpfulness": 0.6386,
"Honesty": 0.638,
"Harmlessness": 0.6499,
"3C3H Score": 0.5711
},
"Tasks Scores": {
"Question Answering (QA)": 0.6395,
"Reasoning": 0.6122,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.7792
}
},
"Meta": {
"Model Name": "google/gemma-2-27b-it",
"License": "gemma",
"Revision": "main",
"Precision": "bfloat16",
"Params": 27.0,
"Total Entries": 279,
"Successful Entries": 279,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.589,
"Completeness": 0.589,
"Conciseness": 0.1834,
"Helpfulness": 0.5797,
"Honesty": 0.5744,
"Harmlessness": 0.589,
"3C3H Score": 0.5174
},
"Tasks Scores": {
"Question Answering (QA)": 0.5462,
"Reasoning": 0.6011,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.7854
}
},
"Meta": {
"Model Name": "google/gemma-2-9b-it",
"License": "gemma",
"Revision": "main",
"Precision": "bfloat16",
"Params": 9.0,
"Total Entries": 279,
"Successful Entries": 279,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.5579,
"Completeness": 0.5544,
"Conciseness": 0.1682,
"Helpfulness": 0.5352,
"Honesty": 0.5436,
"Harmlessness": 0.5579,
"3C3H Score": 0.4862
},
"Tasks Scores": {
"Question Answering (QA)": 0.5925,
"Reasoning": 0.48,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.45
}
},
"Meta": {
"Model Name": "inceptionai/jais-adapted-13b-chat",
"License": "apache-2.0",
"Revision": "main",
"Precision": "float32",
"Params": 13.0,
"Total Entries": 279,
"Successful Entries": 279,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.6679,
"Completeness": 0.6655,
"Conciseness": 0.1804,
"Helpfulness": 0.6326,
"Honesty": 0.652,
"Harmlessness": 0.6679,
"3C3H Score": 0.5777
},
"Tasks Scores": {
"Question Answering (QA)": 0.6864,
"Reasoning": 0.5711,
"Orthographic and Grammatical Analysis": 0.0578,
"Safety": 0.5771
}
},
"Meta": {
"Model Name": "inceptionai/jais-adapted-70b-chat",
"License": "apache-2.0",
"Revision": "main",
"Precision": "float32",
"Params": 70.0,
"Total Entries": 279,
"Successful Entries": 279,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.5211,
"Completeness": 0.5102,
"Conciseness": 0.1339,
"Helpfulness": 0.4798,
"Honesty": 0.5093,
"Harmlessness": 0.5202,
"3C3H Score": 0.4457
},
"Tasks Scores": {
"Question Answering (QA)": 0.5144,
"Reasoning": 0.4844,
"Orthographic and Grammatical Analysis": 0.0269,
"Safety": 0.4312
}
},
"Meta": {
"Model Name": "inceptionai/jais-family-13b-chat",
"License": "apache-2.0",
"Revision": "main",
"Precision": "float32",
"Params": 13.0,
"Total Entries": 279,
"Successful Entries": 277,
"Failed Entries": 2,
"Success Ratio": 0.9928
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.3729,
"Completeness": 0.3669,
"Conciseness": 0.0887,
"Helpfulness": 0.3441,
"Honesty": 0.3543,
"Harmlessness": 0.3711,
"3C3H Score": 0.3163
},
"Tasks Scores": {
"Question Answering (QA)": 0.348,
"Reasoning": 0.3761,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.3417
}
},
"Meta": {
"Model Name": "inceptionai/jais-family-2p7b-chat",
"License": "apache-2.0",
"Revision": "main",
"Precision": "float32",
"Params": 3.0,
"Total Entries": 279,
"Successful Entries": 278,
"Failed Entries": 1,
"Success Ratio": 0.9964
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.5806,
"Completeness": 0.5759,
"Conciseness": 0.1526,
"Helpfulness": 0.5475,
"Honesty": 0.5621,
"Harmlessness": 0.5806,
"3C3H Score": 0.4999
},
"Tasks Scores": {
"Question Answering (QA)": 0.5812,
"Reasoning": 0.5239,
"Orthographic and Grammatical Analysis": 0.0282,
"Safety": 0.5187
}
},
"Meta": {
"Model Name": "inceptionai/jais-family-30b-8k-chat",
"License": "apache-2.0",
"Revision": "main",
"Precision": "float32",
"Params": 30.0,
"Total Entries": 279,
"Successful Entries": 279,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.4755,
"Completeness": 0.4731,
"Conciseness": 0.1243,
"Helpfulness": 0.4522,
"Honesty": 0.4597,
"Harmlessness": 0.4755,
"3C3H Score": 0.41
},
"Tasks Scores": {
"Question Answering (QA)": 0.4743,
"Reasoning": 0.4633,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.3542
}
},
"Meta": {
"Model Name": "inceptionai/jais-family-6p7b-chat",
"License": "apache-2.0",
"Revision": "main",
"Precision": "float32",
"Params": 7.0,
"Total Entries": 279,
"Successful Entries": 279,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.6392,
"Completeness": 0.6129,
"Conciseness": 0.27,
"Helpfulness": 0.6016,
"Honesty": 0.6171,
"Harmlessness": 0.6383,
"3C3H Score": 0.5632
},
"Tasks Scores": {
"Question Answering (QA)": 0.6465,
"Reasoning": 0.6283,
"Orthographic and Grammatical Analysis": 0.0591,
"Safety": 0.4625
}
},
"Meta": {
"Model Name": "meta-llama/Llama-3.1-70B-Instruct",
"License": "llama3.1",
"Revision": "main",
"Precision": "bfloat16",
"Params": 70.0,
"Total Entries": 279,
"Successful Entries": 279,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.4421,
"Completeness": 0.4409,
"Conciseness": 0.1416,
"Helpfulness": 0.3967,
"Honesty": 0.4065,
"Harmlessness": 0.4421,
"3C3H Score": 0.3783
},
"Tasks Scores": {
"Question Answering (QA)": 0.3826,
"Reasoning": 0.45,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.6625
}
},
"Meta": {
"Model Name": "meta-llama/Llama-3.1-8B-Instruct",
"License": "llama3.1",
"Revision": "main",
"Precision": "bfloat16",
"Params": 8.0,
"Total Entries": 279,
"Successful Entries": 279,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.2359,
"Completeness": 0.2058,
"Conciseness": 0.0581,
"Helpfulness": 0.1781,
"Honesty": 0.2106,
"Harmlessness": 0.2341,
"3C3H Score": 0.1871
},
"Tasks Scores": {
"Question Answering (QA)": 0.198,
"Reasoning": 0.2328,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.2229
}
},
"Meta": {
"Model Name": "meta-llama/Meta-Llama-3-8B-Instruct",
"License": "llama3",
"Revision": "main",
"Precision": "bfloat16",
"Params": 14.963,
"Total Entries": 279,
"Successful Entries": 277,
"Failed Entries": 2,
"Success Ratio": 0.9928
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.5204,
"Completeness": 0.1295,
"Conciseness": 0.4149,
"Helpfulness": 0.2332,
"Honesty": 0.4814,
"Harmlessness": 0.5204,
"3C3H Score": 0.3833
},
"Tasks Scores": {
"Question Answering (QA)": 0.4053,
"Reasoning": 0.3806,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.8188
}
},
"Meta": {
"Model Name": "silma-ai/SILMA-9B-Instruct-v1.0",
"License": "gemma",
"Revision": "main",
"Precision": "bfloat16",
"Params": 9.0,
"Total Entries": 279,
"Successful Entries": 278,
"Failed Entries": 1,
"Success Ratio": 0.9964
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.542,
"Completeness": 0.5156,
"Conciseness": 0.2512,
"Helpfulness": 0.5033,
"Honesty": 0.533,
"Harmlessness": 0.542,
"3C3H Score": 0.4812
},
"Tasks Scores": {
"Question Answering (QA)": 0.6009,
"Reasoning": 0.4825,
"Orthographic and Grammatical Analysis": 0.0309,
"Safety": 0.2583
}
},
"Meta": {
"Model Name": "CohereForAI/aya-23-35B",
"License": "cc-by-nc-4.0",
"Revision": "main",
"Precision": "float16",
"Params": 35.0,
"Total Entries": 279,
"Successful Entries": 278,
"Failed Entries": 1,
"Success Ratio": 0.9964
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.5878,
"Completeness": 0.5472,
"Conciseness": 0.1738,
"Helpfulness": 0.5594,
"Honesty": 0.5806,
"Harmlessness": 0.5833,
"3C3H Score": 0.5054
},
"Tasks Scores": {
"Question Answering (QA)": 0.6209,
"Reasoning": 0.5394,
"Orthographic and Grammatical Analysis": 0.0269,
"Safety": 0.2354
}
},
"Meta": {
"Model Name": "CohereForAI/c4ai-command-r-08-2024",
"License": "cc-by-nc-4.0",
"Revision": "main",
"Precision": "float16",
"Params": 32.0,
"Total Entries": 279,
"Successful Entries": 279,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.6282,
"Completeness": 0.6221,
"Conciseness": 0.1733,
"Helpfulness": 0.5978,
"Honesty": 0.6119,
"Harmlessness": 0.6282,
"3C3H Score": 0.5436
},
"Tasks Scores": {
"Question Answering (QA)": 0.6891,
"Reasoning": 0.5333,
"Orthographic and Grammatical Analysis": 0.0264,
"Safety": 0.2521
}
},
"Meta": {
"Model Name": "CohereForAI/c4ai-command-r-v01",
"License": "cc-by-nc-4.0",
"Revision": "main",
"Precision": "float16",
"Params": 35.0,
"Total Entries": 279,
"Successful Entries": 277,
"Failed Entries": 2,
"Success Ratio": 0.9928
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.5297,
"Completeness": 0.4679,
"Conciseness": 0.2876,
"Helpfulness": 0.4694,
"Honesty": 0.5097,
"Harmlessness": 0.5297,
"3C3H Score": 0.4657
},
"Tasks Scores": {
"Question Answering (QA)": 0.5958,
"Reasoning": 0.4296,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.3171
}
},
"Meta": {
"Model Name": "FreedomIntelligence/AceGPT-v1.5-13B-Chat",
"License": "apache-2.0",
"Revision": "main",
"Precision": "float32",
"Params": 13.0,
"Total Entries": 279,
"Successful Entries": 275,
"Failed Entries": 4,
"Success Ratio": 0.9857
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.6717,
"Completeness": 0.6642,
"Conciseness": 0.2906,
"Helpfulness": 0.6479,
"Honesty": 0.6657,
"Harmlessness": 0.6717,
"3C3H Score": 0.602
},
"Tasks Scores": {
"Question Answering (QA)": 0.7136,
"Reasoning": 0.5694,
"Orthographic and Grammatical Analysis": 0.0632,
"Safety": 0.75
}
},
"Meta": {
"Model Name": "FreedomIntelligence/AceGPT-v2-70B-Chat",
"License": "apache-2.0",
"Revision": "main",
"Precision": "float16",
"Params": 70.0,
"Total Entries": 279,
"Successful Entries": 267,
"Failed Entries": 12,
"Success Ratio": 0.957
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.7103,
"Completeness": 0.7091,
"Conciseness": 0.1912,
"Helpfulness": 0.6888,
"Honesty": 0.7036,
"Harmlessness": 0.7103,
"3C3H Score": 0.6189
},
"Tasks Scores": {
"Question Answering (QA)": 0.6862,
"Reasoning": 0.7472,
"Orthographic and Grammatical Analysis": 0.0282,
"Safety": 0.5482
}
},
"Meta": {
"Model Name": "MaziyarPanahi/calme-2.2-qwen2.5-72b",
"License": "tongyi-qianwen",
"Revision": "main",
"Precision": "bfloat16",
"Params": 72.0,
"Total Entries": 279,
"Successful Entries": 275,
"Failed Entries": 4,
"Success Ratio": 0.9857
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.2848,
"Completeness": 0.2848,
"Conciseness": 0.088,
"Helpfulness": 0.2553,
"Honesty": 0.2531,
"Harmlessness": 0.2833,
"3C3H Score": 0.2416
},
"Tasks Scores": {
"Question Answering (QA)": 0.2384,
"Reasoning": 0.2723,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.5486
}
},
"Meta": {
"Model Name": "Qwen/Qwen2.5-1.5B-Instruct",
"License": "qwen",
"Revision": "main",
"Precision": "bfloat16",
"Params": 1.443,
"Total Entries": 279,
"Successful Entries": 268,
"Failed Entries": 11,
"Success Ratio": 0.9606
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.6146,
"Completeness": 0.6059,
"Conciseness": 0.1859,
"Helpfulness": 0.5914,
"Honesty": 0.5988,
"Harmlessness": 0.6146,
"3C3H Score": 0.5352
},
"Tasks Scores": {
"Question Answering (QA)": 0.566,
"Reasoning": 0.6684,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.6009
}
},
"Meta": {
"Model Name": "Qwen/Qwen2.5-14B-Instruct",
"License": "apache-2.0",
"Revision": "main",
"Precision": "bfloat16",
"Params": 14.0,
"Total Entries": 279,
"Successful Entries": 269,
"Failed Entries": 10,
"Success Ratio": 0.9642
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.8831,
"Completeness": 0.8781,
"Conciseness": 0.3327,
"Helpfulness": 0.8697,
"Honesty": 0.8778,
"Harmlessness": 0.8831,
"3C3H Score": 0.7874
},
"Tasks Scores": {
"Question Answering (QA)": 0.7896,
"Reasoning": 0.77,
"Orthographic and Grammatical Analysis": 0.7487,
"Safety": 0.9013
}
},
"Meta": {
"Model Name": "claude-3-5-sonnet-20241022",
"License": "Proprietary",
"Revision": "UNK",
"Precision": "UNK",
"Params": "UNK",
"Total Entries": 279,
"Successful Entries": 268,
"Failed Entries": 11,
"Success Ratio": 0.9606
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.6389,
"Completeness": 0.6377,
"Conciseness": 0.1938,
"Helpfulness": 0.6162,
"Honesty": 0.6316,
"Harmlessness": 0.6389,
"3C3H Score": 0.5595
},
"Tasks Scores": {
"Question Answering (QA)": 0.6376,
"Reasoning": 0.5767,
"Orthographic and Grammatical Analysis": 0.0591,
"Safety": 0.6854
}
},
"Meta": {
"Model Name": "claude-3-haiku-20240307",
"License": "Proprietary",
"Revision": "UNK",
"Precision": "UNK",
"Params": "UNK",
"Total Entries": 279,
"Successful Entries": 276,
"Failed Entries": 3,
"Success Ratio": 0.9892
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.2603,
"Completeness": 0.2311,
"Conciseness": 0.0721,
"Helpfulness": 0.2132,
"Honesty": 0.2476,
"Harmlessness": 0.2594,
"3C3H Score": 0.214
},
"Tasks Scores": {
"Question Answering (QA)": 0.224,
"Reasoning": 0.2934,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.1771
}
},
"Meta": {
"Model Name": "meta-llama/Meta-Llama-3-70B-Instruct",
"License": "llama3",
"Revision": "main",
"Precision": "bfloat16",
"Params": 70.0,
"Total Entries": 279,
"Successful Entries": 274,
"Failed Entries": 5,
"Success Ratio": 0.9821
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.721,
"Completeness": 0.7138,
"Conciseness": 0.2298,
"Helpfulness": 0.7041,
"Honesty": 0.7141,
"Harmlessness": 0.721,
"3C3H Score": 0.634
},
"Tasks Scores": {
"Question Answering (QA)": 0.6923,
"Reasoning": 0.7312,
"Orthographic and Grammatical Analysis": 0.1909,
"Safety": 0.5229
}
},
"Meta": {
"Model Name": "gpt-4o-mini",
"License": "Proprietary",
"Revision": "UNK",
"Precision": "UNK",
"Params": "UNK",
"Total Entries": 279,
"Successful Entries": 276,
"Failed Entries": 3,
"Success Ratio": 0.9892
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.8375,
"Completeness": 0.8291,
"Conciseness": 0.2894,
"Helpfulness": 0.8099,
"Honesty": 0.83,
"Harmlessness": 0.8375,
"3C3H Score": 0.7389
},
"Tasks Scores": {
"Question Answering (QA)": 0.8014,
"Reasoning": 0.7455,
"Orthographic and Grammatical Analysis": 0.5027,
"Safety": 0.6063
}
},
"Meta": {
"Model Name": "gpt-4o",
"License": "Proprietary",
"Revision": "UNK",
"Precision": "UNK",
"Params": "UNK",
"Total Entries": 279,
"Successful Entries": 277,
"Failed Entries": 2,
"Success Ratio": 0.9928
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.7194,
"Completeness": 0.7181,
"Conciseness": 0.1927,
"Helpfulness": 0.6921,
"Honesty": 0.7099,
"Harmlessness": 0.7194,
"3C3H Score": 0.6253
},
"Tasks Scores": {
"Question Answering (QA)": 0.6611,
"Reasoning": 0.7922,
"Orthographic and Grammatical Analysis": 0.0736,
"Safety": 0.5741
}
},
"Meta": {
"Model Name": "rombodawg/Rombos-LLM-V2.5-Qwen-72b",
"License": "qwen",
"Revision": "main",
"Precision": "bfloat16",
"Params": 72.0,
"Total Entries": 279,
"Successful Entries": 272,
"Failed Entries": 7,
"Success Ratio": 0.9749
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.7121,
"Completeness": 0.7097,
"Conciseness": 0.1876,
"Helpfulness": 0.6882,
"Honesty": 0.6968,
"Harmlessness": 0.7121,
"3C3H Score": 0.6177
},
"Tasks Scores": {
"Question Answering (QA)": 0.6815,
"Reasoning": 0.7567,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.5667
}
},
"Meta": {
"Model Name": "MaziyarPanahi/calme-2.1-qwen2.5-72b",
"License": "tongyi-qianwen",
"Revision": "main",
"Precision": "bfloat16",
"Params": 72.0,
"Total Entries": 279,
"Successful Entries": 279,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.3285,
"Completeness": 0.3225,
"Conciseness": 0.0869,
"Helpfulness": 0.2987,
"Honesty": 0.3081,
"Harmlessness": 0.3279,
"3C3H Score": 0.2788
},
"Tasks Scores": {
"Question Answering (QA)": 0.2945,
"Reasoning": 0.3667,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.2625
}
},
"Meta": {
"Model Name": "inceptionai/jais-family-1p3b-chat",
"License": "apache-2.0",
"Revision": "main",
"Precision": "float32",
"Params": 1.0,
"Total Entries": 279,
"Successful Entries": 277,
"Failed Entries": 2,
"Success Ratio": 0.9928
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.5695,
"Completeness": 0.5624,
"Conciseness": 0.1577,
"Helpfulness": 0.5312,
"Honesty": 0.554,
"Harmlessness": 0.5695,
"3C3H Score": 0.4907
},
"Tasks Scores": {
"Question Answering (QA)": 0.5702,
"Reasoning": 0.5139,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.5604
}
},
"Meta": {
"Model Name": "inceptionai/jais-family-30b-16k-chat",
"License": "apache-2.0",
"Revision": "main",
"Precision": "float32",
"Params": 30.0,
"Total Entries": 279,
"Successful Entries": 278,
"Failed Entries": 1,
"Success Ratio": 0.9964
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.1966,
"Completeness": 0.1535,
"Conciseness": 0.0285,
"Helpfulness": 0.1196,
"Honesty": 0.1643,
"Harmlessness": 0.1957,
"3C3H Score": 0.143
},
"Tasks Scores": {
"Question Answering (QA)": 0.1577,
"Reasoning": 0.1872,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.0875
}
},
"Meta": {
"Model Name": "inceptionai/jais-family-590m-chat",
"License": "apache-2.0",
"Revision": "main",
"Precision": "float32",
"Params": 0.719,
"Total Entries": 279,
"Successful Entries": 278,
"Failed Entries": 1,
"Success Ratio": 0.9964
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.0791,
"Completeness": 0.0504,
"Conciseness": 0.0216,
"Helpfulness": 0.0414,
"Honesty": 0.0549,
"Harmlessness": 0.0755,
"3C3H Score": 0.0538
},
"Tasks Scores": {
"Question Answering (QA)": 0.0293,
"Reasoning": 0.0756,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.2417
}
},
"Meta": {
"Model Name": "meta-llama/Llama-3.2-1B-Instruct",
"License": "llama3.2",
"Revision": "main",
"Precision": "bfloat16",
"Params": 1.0,
"Total Entries": 279,
"Successful Entries": 278,
"Failed Entries": 1,
"Success Ratio": 0.9964
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.2736,
"Completeness": 0.2616,
"Conciseness": 0.0792,
"Helpfulness": 0.1971,
"Honesty": 0.2315,
"Harmlessness": 0.2727,
"3C3H Score": 0.2193
},
"Tasks Scores": {
"Question Answering (QA)": 0.2133,
"Reasoning": 0.28,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.3771
}
},
"Meta": {
"Model Name": "meta-llama/Llama-3.2-3B-Instruct",
"License": "llama3.2",
"Revision": "main",
"Precision": "bfloat16",
"Params": 3.0,
"Total Entries": 279,
"Successful Entries": 279,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.6296,
"Completeness": 0.6165,
"Conciseness": 0.2258,
"Helpfulness": 0.5923,
"Honesty": 0.6123,
"Harmlessness": 0.6296,
"3C3H Score": 0.551
},
"Tasks Scores": {
"Question Answering (QA)": 0.6538,
"Reasoning": 0.6033,
"Orthographic and Grammatical Analysis": 0.0309,
"Safety": 0.375
}
},
"Meta": {
"Model Name": "meta-llama/Llama-3.2-90B-Vision-Instruct",
"License": "llama3.2",
"Revision": "main",
"Precision": "bfloat16",
"Params": 90.0,
"Total Entries": 279,
"Successful Entries": 279,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.6858,
"Completeness": 0.6511,
"Conciseness": 0.345,
"Helpfulness": 0.635,
"Honesty": 0.6747,
"Harmlessness": 0.6858,
"3C3H Score": 0.6129
},
"Tasks Scores": {
"Question Answering (QA)": 0.7062,
"Reasoning": 0.6394,
"Orthographic and Grammatical Analysis": 0.0215,
"Safety": 0.7167
}
},
"Meta": {
"Model Name": "meta-llama/Llama-3.3-70B-Instruct",
"License": "llama3.3",
"Revision": "main",
"Precision": "bfloat16",
"Params": 70.0,
"Total Entries": 279,
"Successful Entries": 279,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.3321,
"Completeness": 0.1434,
"Conciseness": 0.0403,
"Helpfulness": 0.1359,
"Honesty": 0.2631,
"Harmlessness": 0.3295,
"3C3H Score": 0.2074
},
"Tasks Scores": {
"Question Answering (QA)": 0.2891,
"Reasoning": 0.1744,
"Orthographic and Grammatical Analysis": 0.0175,
"Safety": 0.0
}
},
"Meta": {
"Model Name": "stabilityai/ar-stablelm-2-chat",
"License": "other",
"Revision": "main",
"Precision": "float32",
"Params": 2.0,
"Total Entries": 279,
"Successful Entries": 279,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.5317,
"Completeness": 0.4875,
"Conciseness": 0.1711,
"Helpfulness": 0.4271,
"Honesty": 0.4904,
"Harmlessness": 0.5317,
"3C3H Score": 0.4399
},
"Tasks Scores": {
"Question Answering (QA)": 0.4885,
"Reasoning": 0.4211,
"Orthographic and Grammatical Analysis": 0.0323,
"Safety": 0.7708
}
},
"Meta": {
"Model Name": "utter-project/EuroLLM-9B-Instruct",
"License": "apache-2.0",
"Revision": "main",
"Precision": "bfloat16",
"Params": 9.0,
"Total Entries": 279,
"Successful Entries": 279,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.6619,
"Completeness": 0.6356,
"Conciseness": 0.1938,
"Helpfulness": 0.6353,
"Honesty": 0.6526,
"Harmlessness": 0.661,
"3C3H Score": 0.5734
},
"Tasks Scores": {
"Question Answering (QA)": 0.7327,
"Reasoning": 0.5506,
"Orthographic and Grammatical Analysis": 0.0538,
"Safety": 0.2458
}
},
"Meta": {
"Model Name": "CohereForAI/c4ai-command-r-plus-08-2024",
"License": "cc-by-nc-4.0",
"Revision": "main",
"Precision": "float16",
"Params": 104.0,
"Total Entries": 279,
"Successful Entries": 279,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.4791,
"Completeness": 0.4433,
"Conciseness": 0.2109,
"Helpfulness": 0.434,
"Honesty": 0.466,
"Harmlessness": 0.4773,
"3C3H Score": 0.4184
},
"Tasks Scores": {
"Question Answering (QA)": 0.4969,
"Reasoning": 0.4778,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.2437
}
},
"Meta": {
"Model Name": "CohereForAI/aya-23-8B",
"License": "cc-by-nc-4.0",
"Revision": "main",
"Precision": "float16",
"Params": 8.0,
"Total Entries": 279,
"Successful Entries": 279,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.4636,
"Completeness": 0.4409,
"Conciseness": 0.1532,
"Helpfulness": 0.4062,
"Honesty": 0.4379,
"Harmlessness": 0.4636,
"3C3H Score": 0.3942
},
"Tasks Scores": {
"Question Answering (QA)": 0.4683,
"Reasoning": 0.4106,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.3771
}
},
"Meta": {
"Model Name": "inceptionai/jais-adapted-7b-chat",
"License": "apache-2.0",
"Revision": "main",
"Precision": "float32",
"Params": 7.0,
"Total Entries": 279,
"Successful Entries": 279,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.6822,
"Completeness": 0.6643,
"Conciseness": 0.2398,
"Helpfulness": 0.6461,
"Honesty": 0.6723,
"Harmlessness": 0.6813,
"3C3H Score": 0.5977
},
"Tasks Scores": {
"Question Answering (QA)": 0.7304,
"Reasoning": 0.5472,
"Orthographic and Grammatical Analysis": 0.2124,
"Safety": 0.3687
}
},
"Meta": {
"Model Name": "CohereForAI/c4ai-command-r-plus",
"License": "cc-by-nc-4.0",
"Revision": "main",
"Precision": "float16",
"Params": 104.0,
"Total Entries": 279,
"Successful Entries": 279,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.5144,
"Completeness": 0.5096,
"Conciseness": 0.1304,
"Helpfulness": 0.4829,
"Honesty": 0.4922,
"Harmlessness": 0.5135,
"3C3H Score": 0.4405
},
"Tasks Scores": {
"Question Answering (QA)": 0.4967,
"Reasoning": 0.5361,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.3375
}
},
"Meta": {
"Model Name": "CohereForAI/c4ai-command-r7b-12-2024",
"License": "cc-by-nc-4.0",
"Revision": "main",
"Precision": "bfloat16",
"Params": 8.0,
"Total Entries": 279,
"Successful Entries": 278,
"Failed Entries": 1,
"Success Ratio": 0.9964
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.6511,
"Completeness": 0.6499,
"Conciseness": 0.1948,
"Helpfulness": 0.634,
"Honesty": 0.6415,
"Harmlessness": 0.6505,
"3C3H Score": 0.5703
},
"Tasks Scores": {
"Question Answering (QA)": 0.6214,
"Reasoning": 0.6911,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.6125
}
},
"Meta": {
"Model Name": "Qwen/Qwen2.5-32B-Instruct",
"License": "apache-2.0",
"Revision": "main",
"Precision": "bfloat16",
"Params": 32.0,
"Total Entries": 279,
"Successful Entries": 278,
"Failed Entries": 1,
"Success Ratio": 0.9964
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.546,
"Completeness": 0.5448,
"Conciseness": 0.1559,
"Helpfulness": 0.5233,
"Honesty": 0.532,
"Harmlessness": 0.5457,
"3C3H Score": 0.4746
},
"Tasks Scores": {
"Question Answering (QA)": 0.482,
"Reasoning": 0.6222,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.6
}
},
"Meta": {
"Model Name": "Qwen/Qwen2.5-7B-Instruct",
"License": "apache-2.0",
"Revision": "main",
"Precision": "bfloat16",
"Params": 7.0,
"Total Entries": 279,
"Successful Entries": 279,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.4676,
"Completeness": 0.464,
"Conciseness": 0.1361,
"Helpfulness": 0.4047,
"Honesty": 0.4158,
"Harmlessness": 0.4658,
"3C3H Score": 0.3923
},
"Tasks Scores": {
"Question Answering (QA)": 0.427,
"Reasoning": 0.4289,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.6
}
},
"Meta": {
"Model Name": "meta-llama/Llama-3.2-11B-Vision-Instruct",
"License": "llama3.2",
"Revision": "main",
"Precision": "bfloat16",
"Params": 11.0,
"Total Entries": 279,
"Successful Entries": 278,
"Failed Entries": 1,
"Success Ratio": 0.9964
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.5863,
"Completeness": 0.5803,
"Conciseness": 0.2338,
"Helpfulness": 0.5659,
"Honesty": 0.5782,
"Harmlessness": 0.5854,
"3C3H Score": 0.5217
},
"Tasks Scores": {
"Question Answering (QA)": 0.5484,
"Reasoning": 0.6389,
"Orthographic and Grammatical Analysis": 0.0188,
"Safety": 0.6583
}
},
"Meta": {
"Model Name": "FreedomIntelligence/AceGPT-v2-32B-Chat",
"License": "apache-2.0",
"Revision": "main",
"Precision": "float16",
"Params": 32.0,
"Total Entries": 279,
"Successful Entries": 278,
"Failed Entries": 1,
"Success Ratio": 0.9964
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.4277,
"Completeness": 0.3955,
"Conciseness": 0.0687,
"Helpfulness": 0.3127,
"Honesty": 0.3668,
"Harmlessness": 0.4232,
"3C3H Score": 0.3324
},
"Tasks Scores": {
"Question Answering (QA)": 0.3284,
"Reasoning": 0.4578,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.4083
}
},
"Meta": {
"Model Name": "Qwen/QwQ-32B-Preview",
"License": "apache-2.0",
"Revision": "main",
"Precision": "bfloat16",
"Params": 32.0,
"Total Entries": 279,
"Successful Entries": 279,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.6558,
"Completeness": 0.6486,
"Conciseness": 0.1895,
"Helpfulness": 0.6276,
"Honesty": 0.6402,
"Harmlessness": 0.6552,
"3C3H Score": 0.5695
},
"Tasks Scores": {
"Question Answering (QA)": 0.6239,
"Reasoning": 0.7094,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.5167
}
},
"Meta": {
"Model Name": "maldv/Qwentile2.5-32B-Instruct",
"License": "Open",
"Revision": "main",
"Precision": "float16",
"Params": 32.0,
"Total Entries": 279,
"Successful Entries": 277,
"Failed Entries": 2,
"Success Ratio": 0.9928
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.8189,
"Completeness": 0.8189,
"Conciseness": 0.2113,
"Helpfulness": 0.7953,
"Honesty": 0.8132,
"Harmlessness": 0.8189,
"3C3H Score": 0.7128
},
"Tasks Scores": {
"Question Answering (QA)": 0.7792,
"Reasoning": 0.7222,
"Orthographic and Grammatical Analysis": 0.5202,
"Safety": 0.4708
}
},
"Meta": {
"Model Name": "deepseek-chat",
"License": "Proprietary",
"Revision": "UNK",
"Precision": "UNK",
"Params": "UNK",
"Total Entries": 279,
"Successful Entries": 278,
"Failed Entries": 1,
"Success Ratio": 0.9964
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.7443,
"Completeness": 0.7336,
"Conciseness": 0.3056,
"Helpfulness": 0.7234,
"Honesty": 0.733,
"Harmlessness": 0.7443,
"3C3H Score": 0.664
},
"Tasks Scores": {
"Question Answering (QA)": 0.7161,
"Reasoning": 0.715,
"Orthographic and Grammatical Analysis": 0.2352,
"Safety": 0.7396
}
},
"Meta": {
"Model Name": "claude-3-5-haiku-20241022",
"License": "Proprietary",
"Revision": "UNK",
"Precision": "UNK",
"Params": "UNK",
"Total Entries": 279,
"Successful Entries": 279,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.5914,
"Completeness": 0.589,
"Conciseness": 0.1974,
"Helpfulness": 0.5648,
"Honesty": 0.5792,
"Harmlessness": 0.5914,
"3C3H Score": 0.5189
},
"Tasks Scores": {
"Question Answering (QA)": 0.5998,
"Reasoning": 0.5878,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.4458
}
},
"Meta": {
"Model Name": "gpt-3.5-turbo-0125",
"License": "Proprietary",
"Revision": "UNK",
"Precision": "UNK",
"Params": "UNK",
"Total Entries": 279,
"Successful Entries": 279,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.7422,
"Completeness": 0.7422,
"Conciseness": 0.2146,
"Helpfulness": 0.7224,
"Honesty": 0.7332,
"Harmlessness": 0.7422,
"3C3H Score": 0.6495
},
"Tasks Scores": {
"Question Answering (QA)": 0.6476,
"Reasoning": 0.805,
"Orthographic and Grammatical Analysis": 0.2204,
"Safety": 0.7458
}
},
"Meta": {
"Model Name": "o1-mini-2024-09-12",
"License": "Proprietary",
"Revision": "UNK",
"Precision": "UNK",
"Params": "UNK",
"Total Entries": 279,
"Successful Entries": 278,
"Failed Entries": 1,
"Success Ratio": 0.9964
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.9271,
"Completeness": 0.9247,
"Conciseness": 0.3465,
"Helpfulness": 0.9119,
"Honesty": 0.9226,
"Harmlessness": 0.9271,
"3C3H Score": 0.8267
},
"Tasks Scores": {
"Question Answering (QA)": 0.8157,
"Reasoning": 0.8478,
"Orthographic and Grammatical Analysis": 0.8266,
"Safety": 0.8313
}
},
"Meta": {
"Model Name": "o1-2024-12-17",
"License": "Proprietary",
"Revision": "UNK",
"Precision": "UNK",
"Params": "UNK",
"Total Entries": 279,
"Successful Entries": 279,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.8029,
"Completeness": 0.7921,
"Conciseness": 0.2733,
"Helpfulness": 0.7838,
"Honesty": 0.7999,
"Harmlessness": 0.8029,
"3C3H Score": 0.7091
},
"Tasks Scores": {
"Question Answering (QA)": 0.7013,
"Reasoning": 0.8422,
"Orthographic and Grammatical Analysis": 0.379,
"Safety": 0.7812
}
},
"Meta": {
"Model Name": "o3-mini-2025-01-31",
"License": "Proprietary",
"Revision": "UNK",
"Precision": "UNK",
"Params": "UNK",
"Total Entries": 279,
"Successful Entries": 279,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.5484,
"Completeness": 0.546,
"Conciseness": 0.1532,
"Helpfulness": 0.5251,
"Honesty": 0.5367,
"Harmlessness": 0.5484,
"3C3H Score": 0.4763
},
"Tasks Scores": {
"Question Answering (QA)": 0.4778,
"Reasoning": 0.6594,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.5167
}
},
"Meta": {
"Model Name": "1024m/PHI-4-Hindi-4bit",
"License": "Open",
"Revision": "main",
"Precision": "4bit",
"Params": 14.0,
"Total Entries": 279,
"Successful Entries": 279,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.6141,
"Completeness": 0.583,
"Conciseness": 0.2327,
"Helpfulness": 0.5573,
"Honesty": 0.5893,
"Harmlessness": 0.6132,
"3C3H Score": 0.5316
},
"Tasks Scores": {
"Question Answering (QA)": 0.6146,
"Reasoning": 0.4711,
"Orthographic and Grammatical Analysis": 0.2124,
"Safety": 0.6188
}
},
"Meta": {
"Model Name": "ALLaM-AI/ALLaM-7B-Instruct-preview",
"License": "apache-2.0",
"Revision": "main",
"Precision": "bfloat16",
"Params": 7.0,
"Total Entries": 279,
"Successful Entries": 279,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.6464,
"Completeness": 0.5364,
"Conciseness": 0.2649,
"Helpfulness": 0.5792,
"Honesty": 0.629,
"Harmlessness": 0.6419,
"3C3H Score": 0.5496
},
"Tasks Scores": {
"Question Answering (QA)": 0.5943,
"Reasoning": 0.6889,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.5375
}
},
"Meta": {
"Model Name": "malhajar/Shahin-v0.1",
"License": "Open",
"Revision": "main",
"Precision": "float16",
"Params": 27.519,
"Total Entries": 279,
"Successful Entries": 279,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.4588,
"Completeness": 0.4468,
"Conciseness": 0.126,
"Helpfulness": 0.3987,
"Honesty": 0.428,
"Harmlessness": 0.4567,
"3C3H Score": 0.3859
},
"Tasks Scores": {
"Question Answering (QA)": 0.4495,
"Reasoning": 0.4589,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.2229
}
},
"Meta": {
"Model Name": "mistralai/Ministral-8B-Instruct-2410",
"License": "mrl",
"Revision": "main",
"Precision": "bfloat16",
"Params": 8.0,
"Total Entries": 279,
"Successful Entries": 279,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.0983,
"Completeness": 0.0899,
"Conciseness": 0.0192,
"Helpfulness": 0.0647,
"Honesty": 0.08,
"Harmlessness": 0.0974,
"3C3H Score": 0.0749
},
"Tasks Scores": {
"Question Answering (QA)": 0.08,
"Reasoning": 0.1156,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.0
}
},
"Meta": {
"Model Name": "mistralai/Mistral-7B-Instruct-v0.2",
"License": "apache-2.0",
"Revision": "main",
"Precision": "bfloat16",
"Params": 7.0,
"Total Entries": 279,
"Successful Entries": 278,
"Failed Entries": 1,
"Success Ratio": 0.9964
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.1971,
"Completeness": 0.1505,
"Conciseness": 0.0218,
"Helpfulness": 0.1045,
"Honesty": 0.1517,
"Harmlessness": 0.1953,
"3C3H Score": 0.1368
},
"Tasks Scores": {
"Question Answering (QA)": 0.1523,
"Reasoning": 0.1339,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.2417
}
},
"Meta": {
"Model Name": "mistralai/Mistral-7B-Instruct-v0.3",
"License": "apache-2.0",
"Revision": "main",
"Precision": "bfloat16",
"Params": 7.0,
"Total Entries": 279,
"Successful Entries": 279,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.7814,
"Completeness": 0.773,
"Conciseness": 0.2237,
"Helpfulness": 0.7455,
"Honesty": 0.7733,
"Harmlessness": 0.7805,
"3C3H Score": 0.6796
},
"Tasks Scores": {
"Question Answering (QA)": 0.7534,
"Reasoning": 0.6583,
"Orthographic and Grammatical Analysis": 0.3817,
"Safety": 0.6563
}
},
"Meta": {
"Model Name": "mistral-saba-2502",
"License": "Proprietary",
"Revision": "UNK",
"Precision": "UNK",
"Params": "UNK",
"Total Entries": 279,
"Successful Entries": 279,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.7085,
"Completeness": 0.7013,
"Conciseness": 0.2148,
"Helpfulness": 0.6897,
"Honesty": 0.6998,
"Harmlessness": 0.7085,
"3C3H Score": 0.6204
},
"Tasks Scores": {
"Question Answering (QA)": 0.728,
"Reasoning": 0.695,
"Orthographic and Grammatical Analysis": 0.0847,
"Safety": 0.3479
}
},
"Meta": {
"Model Name": "mistralai/Mistral-Large-Instruct-2411",
"License": "mrl",
"Revision": "main",
"Precision": "bfloat16",
"Params": 123.0,
"Total Entries": 279,
"Successful Entries": 279,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.3059,
"Completeness": 0.2736,
"Conciseness": 0.1036,
"Helpfulness": 0.2267,
"Honesty": 0.2622,
"Harmlessness": 0.3059,
"3C3H Score": 0.2463
},
"Tasks Scores": {
"Question Answering (QA)": 0.2335,
"Reasoning": 0.2822,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.5917
}
},
"Meta": {
"Model Name": "silma-ai/SILMA-Kashif-2B-Instruct-v1.0",
"License": "Gemma",
"Revision": "main",
"Precision": "bfloat16",
"Params": 2.453,
"Total Entries": 279,
"Successful Entries": 279,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.8789,
"Completeness": 0.8777,
"Conciseness": 0.292,
"Helpfulness": 0.8627,
"Honesty": 0.8726,
"Harmlessness": 0.8789,
"3C3H Score": 0.7771
},
"Tasks Scores": {
"Question Answering (QA)": 0.7845,
"Reasoning": 0.8083,
"Orthographic and Grammatical Analysis": 0.6828,
"Safety": 0.75
}
},
"Meta": {
"Model Name": "claude-3-7-sonnet-20250219",
"License": "Proprietary",
"Revision": "UNK",
"Precision": "UNK",
"Params": "UNK",
"Total Entries": 279,
"Successful Entries": 278,
"Failed Entries": 1,
"Success Ratio": 0.9964
}
},
{
"_last_sync_timestamp": "2025-02-26T09:52:20.282243"
}
]