albertvillanova HF staff commited on
Commit
b6f3b94
1 Parent(s): ca2b34f

Improve label of subtasks

Browse files
Files changed (1) hide show
  1. src/constants.py +35 -35
src/constants.py CHANGED
@@ -14,51 +14,51 @@ TASKS = {
14
  SUBTASKS = {
15
  # "leaderboard_arc_challenge": ["leaderboard_arc_challenge"],
16
  "leaderboard_bbh": [
17
- "leaderboard_bbh_boolean_expressions",
18
- "leaderboard_bbh_causal_judgement",
19
- "leaderboard_bbh_date_understanding",
20
- "leaderboard_bbh_disambiguation_qa",
21
- "leaderboard_bbh_formal_fallacies",
22
- "leaderboard_bbh_geometric_shapes",
23
- "leaderboard_bbh_hyperbaton",
24
- "leaderboard_bbh_logical_deduction_five_objects",
25
- "leaderboard_bbh_logical_deduction_seven_objects",
26
- "leaderboard_bbh_logical_deduction_three_objects",
27
- "leaderboard_bbh_movie_recommendation",
28
- "leaderboard_bbh_navigate",
29
- "leaderboard_bbh_object_counting",
30
- "leaderboard_bbh_penguins_in_a_table",
31
- "leaderboard_bbh_reasoning_about_colored_objects",
32
- "leaderboard_bbh_ruin_names",
33
- "leaderboard_bbh_salient_translation_error_detection",
34
- "leaderboard_bbh_snarks", "leaderboard_bbh_sports_understanding",
35
- "leaderboard_bbh_temporal_sequences",
36
- "leaderboard_bbh_tracking_shuffled_objects_five_objects",
37
- "leaderboard_bbh_tracking_shuffled_objects_seven_objects",
38
- "leaderboard_bbh_tracking_shuffled_objects_three_objects",
39
- "leaderboard_bbh_web_of_lies",
40
  ],
41
  "leaderboard_gpqa": [
42
  "leaderboard_gpqa_extended",
43
  "leaderboard_gpqa_diamond",
44
  "leaderboard_gpqa_main",
45
  ],
46
- "leaderboard_ifeval": ["leaderboard_ifeval"],
47
  # "leaderboard_math_hard": [
48
  "leaderboard_math": [
49
- "leaderboard_math_algebra_hard",
50
- "leaderboard_math_counting_and_prob_hard",
51
- "leaderboard_math_geometry_hard",
52
- "leaderboard_math_intermediate_algebra_hard",
53
- "leaderboard_math_num_theory_hard",
54
- "leaderboard_math_prealgebra_hard",
55
- "leaderboard_math_precalculus_hard",
56
  ],
57
- "leaderboard_mmlu_pro": ["leaderboard_mmlu_pro"],
58
  "leaderboard_musr": [
59
- "leaderboard_musr_murder_mysteries",
60
- "leaderboard_musr_object_placements",
61
- "leaderboard_musr_team_allocation",
62
  ],
63
  }
64
 
 
14
  SUBTASKS = {
15
  # "leaderboard_arc_challenge": ["leaderboard_arc_challenge"],
16
  "leaderboard_bbh": [
17
+ ("Boolean Expressions", "leaderboard_bbh_boolean_expressions"),
18
+ ("Causal Judgment", "leaderboard_bbh_causal_judgement"),
19
+ ("Date Understanding", "leaderboard_bbh_date_understanding"),
20
+ ("Disambiguation QA", "leaderboard_bbh_disambiguation_qa"),
21
+ ("Formal Fallacies Syllogisms Negation", "leaderboard_bbh_formal_fallacies"),
22
+ ("Geometric Shapes", "leaderboard_bbh_geometric_shapes"),
23
+ ("Hyperbaton", "leaderboard_bbh_hyperbaton"),
24
+ ("Logical Deduction (5)", "leaderboard_bbh_logical_deduction_five_objects"),
25
+ ("Logical Deduction (7)", "leaderboard_bbh_logical_deduction_seven_objects"),
26
+ ("Logical Deduction (3)", "leaderboard_bbh_logical_deduction_three_objects"),
27
+ ("Movie Recommendation", "leaderboard_bbh_movie_recommendation"),
28
+ ("Navigate", "leaderboard_bbh_navigate"),
29
+ ("Object Counting", "leaderboard_bbh_object_counting"),
30
+ ("Penguins in a Table", "leaderboard_bbh_penguins_in_a_table"),
31
+ ("Reasoning about Colored Objects", "leaderboard_bbh_reasoning_about_colored_objects"),
32
+ ("Ruin Names", "leaderboard_bbh_ruin_names"),
33
+ ("Salient Translation Error Detection", "leaderboard_bbh_salient_translation_error_detection"),
34
+ ("Sports Understanding", "leaderboard_bbh_snarks", "leaderboard_bbh_sports_understanding"),
35
+ ("Temporal Sequences", "leaderboard_bbh_temporal_sequences"),
36
+ ("Tracking Shuffled Objects (5)", "leaderboard_bbh_tracking_shuffled_objects_five_objects"),
37
+ ("Tracking Shuffled Objects (7)", "leaderboard_bbh_tracking_shuffled_objects_seven_objects"),
38
+ ("Tracking Shuffled Objects (3)", "leaderboard_bbh_tracking_shuffled_objects_three_objects"),
39
+ ("Web of Lies", "leaderboard_bbh_web_of_lies"),
40
  ],
41
  "leaderboard_gpqa": [
42
  "leaderboard_gpqa_extended",
43
  "leaderboard_gpqa_diamond",
44
  "leaderboard_gpqa_main",
45
  ],
46
+ "leaderboard_ifeval": [("IFEval", "leaderboard_ifeval")],
47
  # "leaderboard_math_hard": [
48
  "leaderboard_math": [
49
+ ("Algebra", "leaderboard_math_algebra_hard"),
50
+ ("Counting and Probability", "leaderboard_math_counting_and_prob_hard"),
51
+ ("Geometry", "leaderboard_math_geometry_hard"),
52
+ ("Intermediate Algebra", "leaderboard_math_intermediate_algebra_hard"),
53
+ ("Number Theory", "leaderboard_math_num_theory_hard"),
54
+ ("Prealgebra", "leaderboard_math_prealgebra_hard"),
55
+ ("Precalculus", "leaderboard_math_precalculus_hard"),
56
  ],
57
+ "leaderboard_mmlu_pro": [("MMLU-Pro", "leaderboard_mmlu_pro")],
58
  "leaderboard_musr": [
59
+ ("Murder Mystery", "leaderboard_musr_murder_mysteries"),
60
+ ("Object Placements", "leaderboard_musr_object_placements"),
61
+ ("Team Allocation", "leaderboard_musr_team_allocation"),
62
  ],
63
  }
64