djstrong commited on
Commit
cc79ab2
·
1 Parent(s): 0598762

remove in progress models

Browse files
Files changed (1) hide show
  1. src/leaderboard/read_evals.py +6 -1
src/leaderboard/read_evals.py CHANGED
@@ -444,7 +444,8 @@ def get_raw_eval_results(results_path: str, requests_path: str, metadata) -> lis
444
  missing_results_for_task = {}
445
  missing_metadata = []
446
  for_run=[]
447
- for v in eval_results.values():
 
448
  r = v.to_dict()
449
  in_progress=False
450
  for task in Tasks:
@@ -464,11 +465,15 @@ def get_raw_eval_results(results_path: str, requests_path: str, metadata) -> lis
464
  # print(f'sbatch start.sh "bash eval_model_task_bs1.sh {r["n_shot"]} {task.value.benchmark} {v.full_model}"')
465
  if in_progress:
466
  v.model = '🚧' + v.model
 
467
 
468
  if r[AutoEvalColumn.lang.name] is None or r[AutoEvalColumn.lang.name] == "?":
469
  missing_metadata.append(f"{v.full_model}")
470
  all_models.append((v.full_model, v.num_params, v.still_on_hub))
471
 
 
 
 
472
  results = []
473
  for v in eval_results.values():
474
  try:
 
444
  missing_results_for_task = {}
445
  missing_metadata = []
446
  for_run=[]
447
+ in_progress_models = []
448
+ for k,v in eval_results.items():
449
  r = v.to_dict()
450
  in_progress=False
451
  for task in Tasks:
 
465
  # print(f'sbatch start.sh "bash eval_model_task_bs1.sh {r["n_shot"]} {task.value.benchmark} {v.full_model}"')
466
  if in_progress:
467
  v.model = '🚧' + v.model
468
+ in_progress_models.append(k)
469
 
470
  if r[AutoEvalColumn.lang.name] is None or r[AutoEvalColumn.lang.name] == "?":
471
  missing_metadata.append(f"{v.full_model}")
472
  all_models.append((v.full_model, v.num_params, v.still_on_hub))
473
 
474
+ for k in in_progress_models:
475
+ del eval_results[k]
476
+
477
  results = []
478
  for v in eval_results.values():
479
  try: