Sean Cho commited on
Commit
a507ee8
·
1 Parent(s): 80e30db

Add kocommongen-v2

Browse files
app.py CHANGED
@@ -78,8 +78,8 @@ BENCHMARK_COLS = [
78
  AutoEvalColumn.hellaswag,
79
  AutoEvalColumn.mmlu,
80
  AutoEvalColumn.truthfulqa,
 
81
  # TODO: Uncomment when we have results for these
82
- # AutoEvalColumn.commongen,
83
  # AutoEvalColumn.ethicalverification,
84
  ]
85
  ]
 
78
  AutoEvalColumn.hellaswag,
79
  AutoEvalColumn.mmlu,
80
  AutoEvalColumn.truthfulqa,
81
+ # AutoEvalColumn.commongen_v2,
82
  # TODO: Uncomment when we have results for these
 
83
  # AutoEvalColumn.ethicalverification,
84
  ]
85
  ]
src/display_models/read_results.py CHANGED
@@ -9,14 +9,14 @@ import numpy as np
9
  from src.display_models.utils import AutoEvalColumn, make_clickable_model
10
 
11
  METRICS = ["acc_norm", "acc_norm", "acc", "mc2"]
12
- BENCHMARKS = ["ko_arc_challenge", "ko_hellaswag", "ko_mmlu", "ko_truthfulqa_mc"] #, "ko_commongen", "ethicalverification"]
13
  BENCH_TO_NAME = {
14
  "ko_arc_challenge": AutoEvalColumn.arc.name,
15
  "ko_hellaswag": AutoEvalColumn.hellaswag.name,
16
  "ko_mmlu": AutoEvalColumn.mmlu.name,
17
  "ko_truthfulqa_mc": AutoEvalColumn.truthfulqa.name,
 
18
  # TODO: Uncomment when we have results for these
19
- # "ko_commongen": AutoEvalColumn.commongen.name,
20
  # "ethicalverification": AutoEvalColumn.ethicalverification.name,
21
  }
22
 
 
9
  from src.display_models.utils import AutoEvalColumn, make_clickable_model
10
 
11
  METRICS = ["acc_norm", "acc_norm", "acc", "mc2"]
12
+ BENCHMARKS = ["ko_arc_challenge", "ko_hellaswag", "ko_mmlu", "ko_truthfulqa_mc", "ko_commongen_v2"] #, "ethicalverification"]
13
  BENCH_TO_NAME = {
14
  "ko_arc_challenge": AutoEvalColumn.arc.name,
15
  "ko_hellaswag": AutoEvalColumn.hellaswag.name,
16
  "ko_mmlu": AutoEvalColumn.mmlu.name,
17
  "ko_truthfulqa_mc": AutoEvalColumn.truthfulqa.name,
18
+ "ko_commongen_v2": AutoEvalColumn.commongen_v2.name,
19
  # TODO: Uncomment when we have results for these
 
20
  # "ethicalverification": AutoEvalColumn.ethicalverification.name,
21
  }
22
 
src/display_models/utils.py CHANGED
@@ -29,8 +29,8 @@ class AutoEvalColumn: # Auto evals column
29
  hellaswag = ColumnContent("Ko-HellaSwag", "number", True)
30
  mmlu = ColumnContent("Ko-MMLU", "number", True)
31
  truthfulqa = ColumnContent("Ko-TruthfulQA", "number", True)
 
32
  # TODO: Uncomment when we have results for these
33
- # commongen = ColumnContent("Ko-CommonGen", "number", True)
34
  # ethicalverification = ColumnContent("EthicalVerification", "number", True)
35
  model_type = ColumnContent("Type", "str", False)
36
  precision = ColumnContent("Precision", "str", False) # , True)
 
29
  hellaswag = ColumnContent("Ko-HellaSwag", "number", True)
30
  mmlu = ColumnContent("Ko-MMLU", "number", True)
31
  truthfulqa = ColumnContent("Ko-TruthfulQA", "number", True)
32
+ commongen_v2 = ColumnContent("Ko-CommonGen V2", "number", True)
33
  # TODO: Uncomment when we have results for these
 
34
  # ethicalverification = ColumnContent("EthicalVerification", "number", True)
35
  model_type = ColumnContent("Type", "str", False)
36
  precision = ColumnContent("Precision", "str", False) # , True)
src/load_from_hub.py CHANGED
@@ -79,7 +79,7 @@ def get_leaderboard_df(
79
  all_data.append(gpt4_values)
80
  all_data.append(gpt35_values)
81
 
82
- all_data.append(baseline)
83
  apply_metadata(all_data) # Populate model type based on known hardcoded values in `metadata.py`
84
 
85
  df = pd.DataFrame.from_records(all_data)
 
79
  all_data.append(gpt4_values)
80
  all_data.append(gpt35_values)
81
 
82
+ # all_data.append(baseline)
83
  apply_metadata(all_data) # Populate model type based on known hardcoded values in `metadata.py`
84
 
85
  df = pd.DataFrame.from_records(all_data)