Commit
•
b6f3b94
1
Parent(s):
ca2b34f
Improve label of subtasks
Browse files- src/constants.py +35 -35
src/constants.py
CHANGED
@@ -14,51 +14,51 @@ TASKS = {
|
|
14 |
SUBTASKS = {
|
15 |
# "leaderboard_arc_challenge": ["leaderboard_arc_challenge"],
|
16 |
"leaderboard_bbh": [
|
17 |
-
"leaderboard_bbh_boolean_expressions",
|
18 |
-
"leaderboard_bbh_causal_judgement",
|
19 |
-
"leaderboard_bbh_date_understanding",
|
20 |
-
"leaderboard_bbh_disambiguation_qa",
|
21 |
-
"leaderboard_bbh_formal_fallacies",
|
22 |
-
"leaderboard_bbh_geometric_shapes",
|
23 |
-
"leaderboard_bbh_hyperbaton",
|
24 |
-
"leaderboard_bbh_logical_deduction_five_objects",
|
25 |
-
"leaderboard_bbh_logical_deduction_seven_objects",
|
26 |
-
"leaderboard_bbh_logical_deduction_three_objects",
|
27 |
-
"leaderboard_bbh_movie_recommendation",
|
28 |
-
"leaderboard_bbh_navigate",
|
29 |
-
"leaderboard_bbh_object_counting",
|
30 |
-
"leaderboard_bbh_penguins_in_a_table",
|
31 |
-
"leaderboard_bbh_reasoning_about_colored_objects",
|
32 |
-
"leaderboard_bbh_ruin_names",
|
33 |
-
"leaderboard_bbh_salient_translation_error_detection",
|
34 |
-
"leaderboard_bbh_snarks", "leaderboard_bbh_sports_understanding",
|
35 |
-
"leaderboard_bbh_temporal_sequences",
|
36 |
-
"leaderboard_bbh_tracking_shuffled_objects_five_objects",
|
37 |
-
"leaderboard_bbh_tracking_shuffled_objects_seven_objects",
|
38 |
-
"leaderboard_bbh_tracking_shuffled_objects_three_objects",
|
39 |
-
"leaderboard_bbh_web_of_lies",
|
40 |
],
|
41 |
"leaderboard_gpqa": [
|
42 |
"leaderboard_gpqa_extended",
|
43 |
"leaderboard_gpqa_diamond",
|
44 |
"leaderboard_gpqa_main",
|
45 |
],
|
46 |
-
"leaderboard_ifeval": ["leaderboard_ifeval"],
|
47 |
# "leaderboard_math_hard": [
|
48 |
"leaderboard_math": [
|
49 |
-
"leaderboard_math_algebra_hard",
|
50 |
-
"leaderboard_math_counting_and_prob_hard",
|
51 |
-
"leaderboard_math_geometry_hard",
|
52 |
-
"leaderboard_math_intermediate_algebra_hard",
|
53 |
-
"leaderboard_math_num_theory_hard",
|
54 |
-
"leaderboard_math_prealgebra_hard",
|
55 |
-
"leaderboard_math_precalculus_hard",
|
56 |
],
|
57 |
-
"leaderboard_mmlu_pro": ["leaderboard_mmlu_pro"],
|
58 |
"leaderboard_musr": [
|
59 |
-
"leaderboard_musr_murder_mysteries",
|
60 |
-
"leaderboard_musr_object_placements",
|
61 |
-
"leaderboard_musr_team_allocation",
|
62 |
],
|
63 |
}
|
64 |
|
|
|
14 |
SUBTASKS = {
|
15 |
# "leaderboard_arc_challenge": ["leaderboard_arc_challenge"],
|
16 |
"leaderboard_bbh": [
|
17 |
+
("Boolean Expressions", "leaderboard_bbh_boolean_expressions"),
|
18 |
+
("Causal Judgment", "leaderboard_bbh_causal_judgement"),
|
19 |
+
("Date Understanding", "leaderboard_bbh_date_understanding"),
|
20 |
+
("Disambiguation QA", "leaderboard_bbh_disambiguation_qa"),
|
21 |
+
("Formal Fallacies Syllogisms Negation", "leaderboard_bbh_formal_fallacies"),
|
22 |
+
("Geometric Shapes", "leaderboard_bbh_geometric_shapes"),
|
23 |
+
("Hyperbaton", "leaderboard_bbh_hyperbaton"),
|
24 |
+
("Logical Deduction (5)", "leaderboard_bbh_logical_deduction_five_objects"),
|
25 |
+
("Logical Deduction (7)", "leaderboard_bbh_logical_deduction_seven_objects"),
|
26 |
+
("Logical Deduction (3)", "leaderboard_bbh_logical_deduction_three_objects"),
|
27 |
+
("Movie Recommendation", "leaderboard_bbh_movie_recommendation"),
|
28 |
+
("Navigate", "leaderboard_bbh_navigate"),
|
29 |
+
("Object Counting", "leaderboard_bbh_object_counting"),
|
30 |
+
("Penguins in a Table", "leaderboard_bbh_penguins_in_a_table"),
|
31 |
+
("Reasoning about Colored Objects", "leaderboard_bbh_reasoning_about_colored_objects"),
|
32 |
+
("Ruin Names", "leaderboard_bbh_ruin_names"),
|
33 |
+
("Salient Translation Error Detection", "leaderboard_bbh_salient_translation_error_detection"),
|
34 |
+
("Sports Understanding", "leaderboard_bbh_snarks", "leaderboard_bbh_sports_understanding"),
|
35 |
+
("Temporal Sequences", "leaderboard_bbh_temporal_sequences"),
|
36 |
+
("Tracking Shuffled Objects (5)", "leaderboard_bbh_tracking_shuffled_objects_five_objects"),
|
37 |
+
("Tracking Shuffled Objects (7)", "leaderboard_bbh_tracking_shuffled_objects_seven_objects"),
|
38 |
+
("Tracking Shuffled Objects (3)", "leaderboard_bbh_tracking_shuffled_objects_three_objects"),
|
39 |
+
("Web of Lies", "leaderboard_bbh_web_of_lies"),
|
40 |
],
|
41 |
"leaderboard_gpqa": [
|
42 |
"leaderboard_gpqa_extended",
|
43 |
"leaderboard_gpqa_diamond",
|
44 |
"leaderboard_gpqa_main",
|
45 |
],
|
46 |
+
"leaderboard_ifeval": [("IFEval", "leaderboard_ifeval")],
|
47 |
# "leaderboard_math_hard": [
|
48 |
"leaderboard_math": [
|
49 |
+
("Algebra", "leaderboard_math_algebra_hard"),
|
50 |
+
("Counting and Probability", "leaderboard_math_counting_and_prob_hard"),
|
51 |
+
("Geometry", "leaderboard_math_geometry_hard"),
|
52 |
+
("Intermediate Algebra", "leaderboard_math_intermediate_algebra_hard"),
|
53 |
+
("Number Theory", "leaderboard_math_num_theory_hard"),
|
54 |
+
("Prealgebra", "leaderboard_math_prealgebra_hard"),
|
55 |
+
("Precalculus", "leaderboard_math_precalculus_hard"),
|
56 |
],
|
57 |
+
"leaderboard_mmlu_pro": [("MMLU-Pro", "leaderboard_mmlu_pro")],
|
58 |
"leaderboard_musr": [
|
59 |
+
("Murder Mystery", "leaderboard_musr_murder_mysteries"),
|
60 |
+
("Object Placements", "leaderboard_musr_object_placements"),
|
61 |
+
("Team Allocation", "leaderboard_musr_team_allocation"),
|
62 |
],
|
63 |
}
|
64 |
|