Spaces:

SUSTech
/

tlem

Running

App Files Files Community

facat commited on Nov 12, 2023

Commit

9199665

1 Parent(s): 075ef98

fix mmlu

Browse files

Files changed (1) hide show

tasks.py +46 -59

tasks.py CHANGED Viewed

@@ -10,6 +10,7 @@ from functools import partial
 from .utils import *
 from evaluate import load
 def fake_pipeline(prompts: Iterable[str]) -> list[str]:
@@ -78,12 +79,12 @@ class Task:
                     + example[self.label_column],
                 }
             )[self.input_column]
-            few_shot_prompts = "\n".join(shots)
             test_ds = test_ds.map(
                 lambda example: {
                     self.input_column: few_shot_prompts
-                    + "\n"
                     + example[self.input_column],
                 }
             )
@@ -177,6 +178,9 @@ class Metrics:
 class CMMLU:
     def prompt_cmmlu(example, chat=False):
         prefix = "以下是一道多项选择题，请从A、B、C和D中选择最合适的答案作为这个问题的答案。\n\n" if chat else "问题："
         prompt = prefix + example["Question"]
@@ -283,31 +287,34 @@ class CMMLU:
         "Test": ["computer science"],
     }
-    finer_categories = (
-        pd.Series(subcategories)  # noqa # type: ignore
-        .explode()
-        .reset_index()
-        .set_index(0)
-        .groupby(0)
-        .agg(list)["index"]
-        .to_dict()
-    )
     @classmethod
     def suite(cls, chat=False):
-        suite = {}
         for k, v in cls.categories.items():
             for subject in v:
-                suite[k] = [
-                    Task(
-                        ("haonan-li/cmmlu", subcategories),
-                        metric_name=("sustech/tlem", "cmmlu"),
-                        input_column="prompt",
-                        label_column="Answer",
-                        prompt=partial(cls.prompt_cmmlu, chat=chat),
-                    )
-                    for subcategories in cls.finer_categories[subject]
-                ]
         return suite
@@ -390,9 +397,6 @@ class MMLU:
     }
     categories = {
-        "Math": [
-            "math",
-        ],
         "STEM": [
             "physics",
             "chemistry",
@@ -409,26 +413,7 @@ class MMLU:
             "geography",
             "psychology",
         ],
-        "Other": ["other", "business", "health"],
-        "All": [
-            "physics",
-            "chemistry",
-            "biology",
-            "computer science",
-            "math",
-            "engineering",
-            "history",
-            "philosophy",
-            "law",
-            "politics",
-            "culture",
-            "economics",
-            "geography",
-            "psychology",
-            "other",
-            "business",
-            "health",
-        ],
         "Test": ["culture"],
     }
@@ -443,19 +428,21 @@ class MMLU:
             .agg(list)["index"]
             .to_dict()
         )
-        suite = {}
         for k, v in cls.categories.items():
             for subject in v:
-                suite[k] = [
-                    Task(
-                        ("lukaemon/mmlu", subcategories),
-                        metric_name=("sustech/tlem", "mmlu"),
-                        input_column=cls.input_column,
-                        label_column=cls.label_column,
-                        prompt=partial(cls.prompt_mmlu, chat=chat),
-                        few_shot=0 if chat else 5,
-                        few_shot_from="validation",
-                    )
-                    for subcategories in finer_categories[subject]
-                ]
         return suite

 from .utils import *
 from evaluate import load
+from collections import defaultdict
 def fake_pipeline(prompts: Iterable[str]) -> list[str]:
                     + example[self.label_column],
                 }
             )[self.input_column]
+            few_shot_prompts = "\n\n".join(shots)
             test_ds = test_ds.map(
                 lambda example: {
                     self.input_column: few_shot_prompts
+                    + "\n\n"
                     + example[self.input_column],
                 }
             )
 class CMMLU:
+    input_column = "prompt"
+    label_column = "Answer"
     def prompt_cmmlu(example, chat=False):
         prefix = "以下是一道多项选择题，请从A、B、C和D中选择最合适的答案作为这个问题的答案。\n\n" if chat else "问题："
         prompt = prefix + example["Question"]
         "Test": ["computer science"],
     }
     @classmethod
     def suite(cls, chat=False):
+        finer_categories = (
+            pd.Series(cls.subcategories)  # noqa # type: ignore
+            .explode()
+            .reset_index()
+            .set_index(0)
+            .groupby(0)
+            .agg(list)["index"]
+            .to_dict()
+        )
+        suite = defaultdict(list)
         for k, v in cls.categories.items():
             for subject in v:
+                suite[k].extend(
+                    [
+                        Task(
+                            ("haonan-li/cmmlu", subcategories),
+                            metric_name=("sustech/tlem", "cmmlu"),
+                            input_column=cls.input_column,
+                            label_column=cls.label_column,
+                            prompt=partial(cls.prompt_cmmlu, chat=chat),
+                            few_shot=0 if chat else 5,
+                            few_shot_from="dev",
+                        )
+                        for subcategories in finer_categories[subject]
+                    ]
+                )
         return suite
     }
     categories = {
         "STEM": [
             "physics",
             "chemistry",
             "geography",
             "psychology",
         ],
+        "other": ["other", "business", "health"],
         "Test": ["culture"],
     }
             .agg(list)["index"]
             .to_dict()
         )
+        suite = defaultdict(list)
         for k, v in cls.categories.items():
             for subject in v:
+                suite[k].extend(
+                    [
+                        Task(
+                            ("lukaemon/mmlu", subcategories),
+                            metric_name=("sustech/tlem", "mmlu"),
+                            input_column=cls.input_column,
+                            label_column=cls.label_column,
+                            prompt=partial(cls.prompt_mmlu, chat=chat),
+                            few_shot=0 if chat else 5,
+                            few_shot_from="validation",
+                        )
+                        for subcategories in finer_categories[subject]
+                    ]
+                )
         return suite