farimafatahi
commited on
Update tiered_models_data.csv
Browse files- tiered_models_data.csv +22 -22
tiered_models_data.csv
CHANGED
@@ -1,23 +1,23 @@
|
|
1 |
-
tier,model,factuality_score,hallucination_score,avg_tokens,avg_factual_units,avg_undecidable_units,avg_unsupported_units
|
2 |
-
Tier 1: Hard,GPT4-o,75.69,0.64,561.72,23.91,4.61,1.01
|
3 |
-
Tier 1: Hard,Gemini1.5-Pro,73.81,0.68,516.41,22.23,4.47,1.12
|
4 |
-
Tier 1: Hard,Llama3.1-70B-Instruct,70.01,0.89,531.35,27.09,5.67,2.13
|
5 |
-
Tier 1: Hard,Llama3.1-405B-Instruct,68.64,0.93,550.74,26.6,6.15,2.19
|
6 |
-
Tier 1: Hard,Claude-3.5-Sonnet,74.95,0.65,395.77,22.64,4.03,1.19
|
7 |
-
Tier 1: Hard,CommandR+,73.15,0.71,440.93,23.55,4.51,1.4
|
8 |
-
Tier 1: Hard,Mistral-Large-2,75.19,0.67,485.58,23.21,4.09,1.36
|
9 |
-
Tier 2: Moderate,GPT4-o,80.72,0.5,624.67,24.42,3.59,0.89
|
10 |
-
Tier 2: Moderate,Gemini1.5-Pro,78.02,0.57,565.97,22.16,3.71,0.97
|
11 |
-
Tier 2: Moderate,Llama3.1-70B-Instruct,75.76,0.71,607.44,25.35,4.33,1.76
|
12 |
-
Tier 2: Moderate,Llama3.1-405B-Instruct,75.05,0.7,599.3,25.24,4.74,1.41
|
13 |
-
Tier 2: Moderate,Claude-3.5-Sonnet,79.92,0.54,414.32,22.15,3.32,1.09
|
14 |
-
Tier 2: Moderate,CommandR+,80.71,0.52,483.32,24.1,3.17,1.09
|
15 |
-
Tier 2: Moderate,Mistral-Large-2,79.97,0.52,528.44,22.65,3.21,1.02
|
16 |
-
Tier 3: Easy,GPT4-o,91.63,0.26,640.84,29.29,2.01,0.53
|
17 |
-
Tier 3: Easy,Gemini1.5-Pro,89.86,0.31,551.81,25.6,1.88,0.71
|
18 |
-
Tier 3: Easy,Llama3.1-70B-Instruct,89.3,0.33,607.75,31.38,2.08,0.83
|
19 |
-
Tier 3: Easy,Llama3.1-405B-Instruct,86.57,0.4,599.87,30.12,2.88,0.85
|
20 |
-
Tier 3: Easy,Claude-3.5-Sonnet,89.61,0.3,411.2,26.72,1.49,0.81
|
21 |
-
Tier 3: Easy,CommandR+,91.65,0.25,499.06,27.95,1.57,0.54
|
22 |
-
Tier 3: Easy,Mistral-Large-2,92.0,0.25,523.57,27.8,1.8,0.55
|
23 |
|
|
|
1 |
+
tier,model,factuality_score,hallucination_score,avg_tokens,avg_factual_units,avg_undecidable_units,avg_unsupported_units
|
2 |
+
Tier 1: Hard,GPT4-o,75.69,0.64,561.72,23.91,4.61,1.01
|
3 |
+
Tier 1: Hard,Gemini1.5-Pro,73.81,0.68,516.41,22.23,4.47,1.12
|
4 |
+
Tier 1: Hard,Llama3.1-70B-Instruct,70.01,0.89,531.35,27.09,5.67,2.13
|
5 |
+
Tier 1: Hard,Llama3.1-405B-Instruct,68.64,0.93,550.74,26.6,6.15,2.19
|
6 |
+
Tier 1: Hard,Claude-3.5-Sonnet,74.95,0.65,395.77,22.64,4.03,1.19
|
7 |
+
Tier 1: Hard,CommandR+,73.15,0.71,440.93,23.55,4.51,1.4
|
8 |
+
Tier 1: Hard,Mistral-Large-2,75.19,0.67,485.58,23.21,4.09,1.36
|
9 |
+
Tier 2: Moderate,GPT4-o,80.72,0.5,624.67,24.42,3.59,0.89
|
10 |
+
Tier 2: Moderate,Gemini1.5-Pro,78.02,0.57,565.97,22.16,3.71,0.97
|
11 |
+
Tier 2: Moderate,Llama3.1-70B-Instruct,75.76,0.71,607.44,25.35,4.33,1.76
|
12 |
+
Tier 2: Moderate,Llama3.1-405B-Instruct,75.05,0.7,599.3,25.24,4.74,1.41
|
13 |
+
Tier 2: Moderate,Claude-3.5-Sonnet,79.92,0.54,414.32,22.15,3.32,1.09
|
14 |
+
Tier 2: Moderate,CommandR+,80.71,0.52,483.32,24.1,3.17,1.09
|
15 |
+
Tier 2: Moderate,Mistral-Large-2,79.97,0.52,528.44,22.65,3.21,1.02
|
16 |
+
Tier 3: Easy,GPT4-o,91.63,0.26,640.84,29.29,2.01,0.53
|
17 |
+
Tier 3: Easy,Gemini1.5-Pro,89.86,0.31,551.81,25.6,1.88,0.71
|
18 |
+
Tier 3: Easy,Llama3.1-70B-Instruct,89.3,0.33,607.75,31.38,2.08,0.83
|
19 |
+
Tier 3: Easy,Llama3.1-405B-Instruct,86.57,0.4,599.87,30.12,2.88,0.85
|
20 |
+
Tier 3: Easy,Claude-3.5-Sonnet,89.61,0.3,411.2,26.72,1.49,0.81
|
21 |
+
Tier 3: Easy,CommandR+,91.65,0.25,499.06,27.95,1.57,0.54
|
22 |
+
Tier 3: Easy,Mistral-Large-2,92.0,0.25,523.57,27.8,1.8,0.55
|
23 |
|