Spaces:
Restarting
on
CPU Upgrade
Restarting
on
CPU Upgrade
Sean Cho
commited on
Commit
•
6313532
1
Parent(s):
150c99b
comment out evaluation fields not ready
Browse files- app.py +3 -2
- src/display_models/read_results.py +5 -4
- src/display_models/utils.py +3 -2
app.py
CHANGED
@@ -77,8 +77,9 @@ BENCHMARK_COLS = [
|
|
77 |
AutoEvalColumn.hellaswag,
|
78 |
AutoEvalColumn.mmlu,
|
79 |
AutoEvalColumn.truthfulqa,
|
80 |
-
|
81 |
-
AutoEvalColumn.
|
|
|
82 |
]
|
83 |
]
|
84 |
|
|
|
77 |
AutoEvalColumn.hellaswag,
|
78 |
AutoEvalColumn.mmlu,
|
79 |
AutoEvalColumn.truthfulqa,
|
80 |
+
# TODO: Uncomment when we have results for these
|
81 |
+
# AutoEvalColumn.commongen,
|
82 |
+
# AutoEvalColumn.ethicalverification,
|
83 |
]
|
84 |
]
|
85 |
|
src/display_models/read_results.py
CHANGED
@@ -9,14 +9,15 @@ import numpy as np
|
|
9 |
from src.display_models.utils import AutoEvalColumn, make_clickable_model
|
10 |
|
11 |
METRICS = ["acc_norm", "acc_norm", "acc", "mc2"]
|
12 |
-
BENCHMARKS = ["ko_arc_challenge", "ko_hellaswag", "ko_mmlu", "
|
13 |
BENCH_TO_NAME = {
|
14 |
"ko_arc_challenge": AutoEvalColumn.arc.name,
|
15 |
"ko_hellaswag": AutoEvalColumn.hellaswag.name,
|
16 |
"ko_mmlu": AutoEvalColumn.mmlu.name,
|
17 |
-
"
|
18 |
-
|
19 |
-
"
|
|
|
20 |
}
|
21 |
|
22 |
|
|
|
9 |
from src.display_models.utils import AutoEvalColumn, make_clickable_model
|
10 |
|
11 |
METRICS = ["acc_norm", "acc_norm", "acc", "mc2"]
|
12 |
+
BENCHMARKS = ["ko_arc_challenge", "ko_hellaswag", "ko_mmlu", "ko_truthfulqa_mc"] #, "ko_commongen", "ethicalverification"]
|
13 |
BENCH_TO_NAME = {
|
14 |
"ko_arc_challenge": AutoEvalColumn.arc.name,
|
15 |
"ko_hellaswag": AutoEvalColumn.hellaswag.name,
|
16 |
"ko_mmlu": AutoEvalColumn.mmlu.name,
|
17 |
+
"ko_truthfulqa_mc": AutoEvalColumn.truthfulqa.name,
|
18 |
+
# TODO: Uncomment when we have results for these
|
19 |
+
# "ko_commongen": AutoEvalColumn.commongen.name,
|
20 |
+
# "ethicalverification": AutoEvalColumn.ethicalverification.name,
|
21 |
}
|
22 |
|
23 |
|
src/display_models/utils.py
CHANGED
@@ -29,8 +29,9 @@ class AutoEvalColumn: # Auto evals column
|
|
29 |
hellaswag = ColumnContent("Ko-HellaSwag", "number", True)
|
30 |
mmlu = ColumnContent("Ko-MMLU", "number", True)
|
31 |
truthfulqa = ColumnContent("Ko-TruthfulQA", "number", True)
|
32 |
-
|
33 |
-
|
|
|
34 |
model_type = ColumnContent("Type", "str", False)
|
35 |
precision = ColumnContent("Precision", "str", False) # , True)
|
36 |
license = ColumnContent("Hub License", "str", False)
|
|
|
29 |
hellaswag = ColumnContent("Ko-HellaSwag", "number", True)
|
30 |
mmlu = ColumnContent("Ko-MMLU", "number", True)
|
31 |
truthfulqa = ColumnContent("Ko-TruthfulQA", "number", True)
|
32 |
+
# TODO: Uncomment when we have results for these
|
33 |
+
# commongen = ColumnContent("Ko-CommonGen", "number", True)
|
34 |
+
# ethicalverification = ColumnContent("EthicalVerification", "number", True)
|
35 |
model_type = ColumnContent("Type", "str", False)
|
36 |
precision = ColumnContent("Precision", "str", False) # , True)
|
37 |
license = ColumnContent("Hub License", "str", False)
|