Spaces:
Restarting
Restarting
update to display failed model
Browse files- src/pages/submit.py +12 -0
- src/populate.py +3 -1
src/pages/submit.py
CHANGED
@@ -9,6 +9,7 @@ def show_submit_page(index: int):
|
|
9 |
finished_eval_queue_df,
|
10 |
running_eval_queue_df,
|
11 |
pending_eval_queue_df,
|
|
|
12 |
) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
|
13 |
with gr.TabItem("π Submit! ", elem_id="llm-benchmark-tab-table", id=index):
|
14 |
|
@@ -24,6 +25,17 @@ def show_submit_page(index: int):
|
|
24 |
datatype=EVAL_TYPES,
|
25 |
row_count=5,
|
26 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
|
28 |
with gr.Accordion(
|
29 |
f"β³ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
|
|
|
9 |
finished_eval_queue_df,
|
10 |
running_eval_queue_df,
|
11 |
pending_eval_queue_df,
|
12 |
+
failed_eval_queue_df,
|
13 |
) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
|
14 |
with gr.TabItem("π Submit! ", elem_id="llm-benchmark-tab-table", id=index):
|
15 |
|
|
|
25 |
datatype=EVAL_TYPES,
|
26 |
row_count=5,
|
27 |
)
|
28 |
+
with gr.Accordion(
|
29 |
+
f"π΄ Failed Evaluations ({len(failed_eval_queue_df)})",
|
30 |
+
open=False,
|
31 |
+
):
|
32 |
+
with gr.Row():
|
33 |
+
failed_eval_table = gr.components.Dataframe(
|
34 |
+
value=failed_eval_queue_df,
|
35 |
+
headers=EVAL_COLS,
|
36 |
+
datatype=EVAL_TYPES,
|
37 |
+
row_count=5,
|
38 |
+
)
|
39 |
|
40 |
with gr.Accordion(
|
41 |
f"β³ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
|
src/populate.py
CHANGED
@@ -45,7 +45,9 @@ def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
|
|
45 |
pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
|
46 |
running_list = [e for e in all_evals if e["status"] == "RUNNING"]
|
47 |
finished_list = [e for e in all_evals if e["status"].startswith("FINISHED") or e["status"] == "PENDING_NEW_EVAL"]
|
|
|
48 |
df_pending = pd.DataFrame.from_records(pending_list, columns=cols)
|
49 |
df_running = pd.DataFrame.from_records(running_list, columns=cols)
|
50 |
df_finished = pd.DataFrame.from_records(finished_list, columns=cols)
|
51 |
-
|
|
|
|
45 |
pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
|
46 |
running_list = [e for e in all_evals if e["status"] == "RUNNING"]
|
47 |
finished_list = [e for e in all_evals if e["status"].startswith("FINISHED") or e["status"] == "PENDING_NEW_EVAL"]
|
48 |
+
failed_list = [e for e in all_evals if e["status"].startswith("FAILED")]
|
49 |
df_pending = pd.DataFrame.from_records(pending_list, columns=cols)
|
50 |
df_running = pd.DataFrame.from_records(running_list, columns=cols)
|
51 |
df_finished = pd.DataFrame.from_records(finished_list, columns=cols)
|
52 |
+
df_failed = pd.DataFrame.from_records(failed_list, columns=cols)
|
53 |
+
return df_finished[cols], df_running[cols], df_pending[cols], df_failed[cols]
|