Spaces:
Running
Running
"""A gradio app that renders a static leaderboard. This is used for Hugging Face Space.""" | |
import ast | |
import argparse | |
import glob | |
import pickle | |
import gradio as gr | |
import numpy as np | |
import pandas as pd | |
import gradio as gr | |
import pandas as pd | |
from pathlib import Path | |
import json | |
from constants import BANNER, INTRODUCTION_TEXT, CITATION_TEXT, METRICS_TAB_TEXT, DIR_OUTPUT_REQUESTS | |
from init import is_model_on_hub, upload_file, load_all_info_from_dataset_hub | |
from utils_display import AutoEvalColumn, fields, make_clickable_model, styled_error, styled_message | |
from datetime import datetime, timezone | |
LAST_UPDATED = "Feb 27th 2024" | |
css = """ | |
.markdown-text{font-size: 16pt} | |
th { | |
text-align: center; | |
} | |
td { | |
font-size: 16px; /* Adjust the font size as needed */ | |
text-align: center; | |
} | |
""" | |
column_names = { | |
"model": "Model", | |
"Overall": "All π―", | |
"Turn 1": "Turn 1οΈβ£", | |
"Turn 2": "Turn 2οΈβ£", | |
} | |
model_info = { | |
"gpt-4": {"hf_name": "https://platform.openai.com/", "pretty_name": "gpt-4"}, | |
"gpt-3.5-turbo": {"hf_name": "https://platform.openai.com/", "pretty_name": "gpt-3.5-turbo"}, | |
"Llama-2-70b-hf": {"hf_name": "meta-llama/Llama-2-70b-hf", "pretty_name": "Llama-2-70B"}, | |
"Llama-2-13b-hf": {"hf_name": "meta-llama/Llama-2-13b-hf", "pretty_name": "Llama-2-13B"}, | |
"Llama-2-7b-hf": {"hf_name": "meta-llama/Llama-2-7b-hf", "pretty_name": "Llama-2-7B"}, | |
"Mixtral-8x7B-v0.1": {"hf_name": "mistralai/Mixtral-8x7B-v0.1", "pretty_name": "Mixtral-8x7B"}, | |
"Mistral-7b-v0.1": {"hf_name": "mistralai/Mistral-7B-v0.1", "pretty_name": "Mistral-7B"}, | |
"Yi-34B": {"hf_name": "01-ai/Yi-34B", "pretty_name": "Yi-34B"}, | |
"Yi-6B": {"hf_name": "01-ai/Yi-6B", "pretty_name": "Yi-6B"}, | |
"gemma-7b": {"hf_name": "google/gemma-7b", "pretty_name": "Gemma-7B"}, | |
"gemma-2b": {"hf_name": "google/gemma-2b", "pretty_name": "Gemma-2B"}, | |
"phi-2": {"hf_name": "microsoft/phi-2", "pretty_name": "Phi-2 (2.7B)"}, | |
"olmo": {"hf_name": "allenai/OLMo-7B", "pretty_name": "OLMo-7B"}, | |
} | |
# Formats the columns | |
def formatter(x): | |
if type(x) is str: | |
x = x | |
else: | |
x = round(x, 2) | |
return x | |
def build_demo(original_df, TYPES): | |
with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo: | |
# gr.HTML(BANNER, elem_id="banner") | |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") | |
with gr.Tabs(elem_classes="tab-buttons") as tabs: | |
with gr.TabItem("π Leaderboard", elem_id="od-benchmark-tab-table", id=0): | |
leaderboard_table = gr.components.Dataframe( | |
value=original_df, | |
datatype=TYPES, | |
height=1000, | |
wrap=False, | |
elem_id="leaderboard-table", | |
interactive=False, | |
visible=True, | |
min_width=60, | |
) | |
with gr.TabItem("π Metrics", elem_id="od-benchmark-tab-table", id=1): | |
gr.Markdown(METRICS_TAB_TEXT, elem_classes="markdown-text") | |
gr.Markdown(f"Last updated on **{LAST_UPDATED}**", elem_classes="markdown-text") | |
with gr.Row(): | |
with gr.Accordion("π Citation", open=False): | |
gr.Textbox( | |
value=CITATION_TEXT, lines=7, | |
label="Copy the BibTeX to cite URIAL and MT-Bench", | |
elem_id="citation-button", | |
show_copy_button=True) | |
# ).style(show_copy_button=True) | |
return demo | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser() | |
parser.add_argument("--share", action="store_true") | |
parser.add_argument("--result_file", help="Path to results table", default="leaderboard_data.jsonl") | |
args = parser.parse_args() | |
bench_results = args.result_file | |
original_df = pd.read_json(bench_results, lines=True) | |
print(original_df.columns) | |
for col in original_df.columns: | |
if col == "model": | |
original_df[col] = original_df[col].apply(lambda x: x.replace(x, make_clickable_model(x, model_info))) | |
else: | |
original_df[col] = original_df[col].apply(formatter) # For numerical values | |
# Define the first column explicitly, add 'Overall' as the second column, and then append the rest excluding 'Overall' | |
new_order = [original_df.columns[0], 'Overall'] + [col for col in original_df.columns if col not in [original_df.columns[0], 'Overall']] | |
# Reorder the DataFrame columns using the new order | |
reordered_df = original_df[new_order] | |
reordered_df.sort_values(by='Overall', inplace=True, ascending=False) | |
reordered_df.rename(columns=column_names, inplace=True) | |
# COLS = [c.name for c in fields(AutoEvalColumn)] | |
# TYPES = [c.type for c in fields(AutoEvalColumn)] | |
TYPES = ["markdown", "number"] | |
demo = build_demo(reordered_df, TYPES) | |
demo.launch(share=args.share) | |