Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Sean Cho
commited on
Commit
·
a507ee8
1
Parent(s):
80e30db
Add kocommongen-v2
Browse files- app.py +1 -1
- src/display_models/read_results.py +2 -2
- src/display_models/utils.py +1 -1
- src/load_from_hub.py +1 -1
app.py
CHANGED
@@ -78,8 +78,8 @@ BENCHMARK_COLS = [
|
|
78 |
AutoEvalColumn.hellaswag,
|
79 |
AutoEvalColumn.mmlu,
|
80 |
AutoEvalColumn.truthfulqa,
|
|
|
81 |
# TODO: Uncomment when we have results for these
|
82 |
-
# AutoEvalColumn.commongen,
|
83 |
# AutoEvalColumn.ethicalverification,
|
84 |
]
|
85 |
]
|
|
|
78 |
AutoEvalColumn.hellaswag,
|
79 |
AutoEvalColumn.mmlu,
|
80 |
AutoEvalColumn.truthfulqa,
|
81 |
+
# AutoEvalColumn.commongen_v2,
|
82 |
# TODO: Uncomment when we have results for these
|
|
|
83 |
# AutoEvalColumn.ethicalverification,
|
84 |
]
|
85 |
]
|
src/display_models/read_results.py
CHANGED
@@ -9,14 +9,14 @@ import numpy as np
|
|
9 |
from src.display_models.utils import AutoEvalColumn, make_clickable_model
|
10 |
|
11 |
METRICS = ["acc_norm", "acc_norm", "acc", "mc2"]
|
12 |
-
BENCHMARKS = ["ko_arc_challenge", "ko_hellaswag", "ko_mmlu", "ko_truthfulqa_mc"] #, "
|
13 |
BENCH_TO_NAME = {
|
14 |
"ko_arc_challenge": AutoEvalColumn.arc.name,
|
15 |
"ko_hellaswag": AutoEvalColumn.hellaswag.name,
|
16 |
"ko_mmlu": AutoEvalColumn.mmlu.name,
|
17 |
"ko_truthfulqa_mc": AutoEvalColumn.truthfulqa.name,
|
|
|
18 |
# TODO: Uncomment when we have results for these
|
19 |
-
# "ko_commongen": AutoEvalColumn.commongen.name,
|
20 |
# "ethicalverification": AutoEvalColumn.ethicalverification.name,
|
21 |
}
|
22 |
|
|
|
9 |
from src.display_models.utils import AutoEvalColumn, make_clickable_model
|
10 |
|
11 |
METRICS = ["acc_norm", "acc_norm", "acc", "mc2"]
|
12 |
+
BENCHMARKS = ["ko_arc_challenge", "ko_hellaswag", "ko_mmlu", "ko_truthfulqa_mc", "ko_commongen_v2"] #, "ethicalverification"]
|
13 |
BENCH_TO_NAME = {
|
14 |
"ko_arc_challenge": AutoEvalColumn.arc.name,
|
15 |
"ko_hellaswag": AutoEvalColumn.hellaswag.name,
|
16 |
"ko_mmlu": AutoEvalColumn.mmlu.name,
|
17 |
"ko_truthfulqa_mc": AutoEvalColumn.truthfulqa.name,
|
18 |
+
"ko_commongen_v2": AutoEvalColumn.commongen_v2.name,
|
19 |
# TODO: Uncomment when we have results for these
|
|
|
20 |
# "ethicalverification": AutoEvalColumn.ethicalverification.name,
|
21 |
}
|
22 |
|
src/display_models/utils.py
CHANGED
@@ -29,8 +29,8 @@ class AutoEvalColumn: # Auto evals column
|
|
29 |
hellaswag = ColumnContent("Ko-HellaSwag", "number", True)
|
30 |
mmlu = ColumnContent("Ko-MMLU", "number", True)
|
31 |
truthfulqa = ColumnContent("Ko-TruthfulQA", "number", True)
|
|
|
32 |
# TODO: Uncomment when we have results for these
|
33 |
-
# commongen = ColumnContent("Ko-CommonGen", "number", True)
|
34 |
# ethicalverification = ColumnContent("EthicalVerification", "number", True)
|
35 |
model_type = ColumnContent("Type", "str", False)
|
36 |
precision = ColumnContent("Precision", "str", False) # , True)
|
|
|
29 |
hellaswag = ColumnContent("Ko-HellaSwag", "number", True)
|
30 |
mmlu = ColumnContent("Ko-MMLU", "number", True)
|
31 |
truthfulqa = ColumnContent("Ko-TruthfulQA", "number", True)
|
32 |
+
commongen_v2 = ColumnContent("Ko-CommonGen V2", "number", True)
|
33 |
# TODO: Uncomment when we have results for these
|
|
|
34 |
# ethicalverification = ColumnContent("EthicalVerification", "number", True)
|
35 |
model_type = ColumnContent("Type", "str", False)
|
36 |
precision = ColumnContent("Precision", "str", False) # , True)
|
src/load_from_hub.py
CHANGED
@@ -79,7 +79,7 @@ def get_leaderboard_df(
|
|
79 |
all_data.append(gpt4_values)
|
80 |
all_data.append(gpt35_values)
|
81 |
|
82 |
-
all_data.append(baseline)
|
83 |
apply_metadata(all_data) # Populate model type based on known hardcoded values in `metadata.py`
|
84 |
|
85 |
df = pd.DataFrame.from_records(all_data)
|
|
|
79 |
all_data.append(gpt4_values)
|
80 |
all_data.append(gpt35_values)
|
81 |
|
82 |
+
# all_data.append(baseline)
|
83 |
apply_metadata(all_data) # Populate model type based on known hardcoded values in `metadata.py`
|
84 |
|
85 |
df = pd.DataFrame.from_records(all_data)
|