File size: 17,314 Bytes
cd2355c 278fc7f 867b5a3 cd2355c 53b6e2c cd2355c 278fc7f 867b5a3 0090c02 278fc7f 3a2452f cd2355c 278fc7f 491fabd 278fc7f 2c70658 d1b5244 2c70658 46f3e87 afc700f 2c70658 46f3e87 cd2355c 94e6643 16d04aa 94e6643 278fc7f cd2355c b2842f3 cd2355c 278fc7f cd2355c 278fc7f 867b5a3 cd2355c 867b5a3 cd2355c 333d5d7 cd2355c 333d5d7 278fc7f cd2355c 278fc7f cd2355c adcaec0 9b48a57 47f3547 9e3047b 45e90ed 97a5917 47f3547 cd2355c 47f3547 278fc7f 867b5a3 dd7478e 867b5a3 278fc7f 47f3547 adcaec0 278fc7f cd2355c 867b5a3 cd2355c 278fc7f cd2355c 278fc7f cd2355c 278fc7f cd2355c afc700f cd2355c 278fc7f b3d1484 f35bc97 cd2355c 278fc7f cd2355c 278fc7f cd2355c 278fc7f cd2355c 278fc7f cd2355c 278fc7f cd2355c 867b5a3 cd2355c 278fc7f cd2355c 278fc7f cd2355c 98506a2 f35bc97 cd2355c 2c70658 cd2355c 278fc7f 5d799e5 278fc7f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 |
import pandas as pd
import requests
import os
import gradio
import gradio as gr
from info.train_a_model import (
LLM_BENCHMARKS_TEXT)
from info.submit import (
SUBMIT_TEXT)
from info.deployment import (
DEPLOY_TEXT)
from info.programs import (
PROGRAMS_TEXT)
from info.citation import(
CITATION_TEXT)
from info.validated_chat_models import(
VALIDATED_CHAT_MODELS)
from info.about import(
ABOUT)
from src.processing import filter_benchmarks_table
inference_endpoint_url = os.environ['inference_endpoint_url']
submission_form_endpoint_url = os.environ['submission_form_endpoint_url']
inference_concurrency_limit = os.environ['inference_concurrency_limit']
demo = gr.Blocks()
with demo:
gr.HTML("""<h1 align="center" id="space-title">π€Powered-by-Intel LLM Leaderboard π»</h1>""")
gr.Markdown("""This leaderboard is designed to evaluate, score, and rank open-source LLMs
that have been pre-trained or fine-tuned on Intel Hardware π¦Ύ. To submit your model for evaluation,
follow the instructions and complete the form in the ποΈ Submit tab. Models submitted to the leaderboard are evaluated
on the Intel Developer Cloud βοΈ. The evaluation platform consists of Gaudi Accelerators and Xeon CPUs running benchmarks from
the [Eleuther AI Language Model Evaluation Harness](https://github.com/EleutherAI/lm-evaluation-harness).""")
gr.Markdown("""A special shout-out to the π€ [Open LLM Leaderboard](https://huggingface.co./spaces/HuggingFaceH4/open_llm_leaderboard)
team for generously sharing their code and best
practices, ensuring that AI Developers have a valuable and enjoyable tool at their disposal.""")
def submit_to_endpoint(model_name, revision_name, model_type, hw_type, terms, precision, weight_type, training_infra, affiliation, base_model):
# Construct the data payload to send
data = {
"model_name": model_name,
"revision_name": revision_name,
"model_type": model_type,
"hw_type": hw_type,
"terms": terms,
"precision": precision,
"weight_type": weight_type,
"training_infrastructure": training_infra,
"affiliation": affiliation,
"base_model": base_model
}
# URL of the endpoint expecting the HTTP request
url = submission_form_endpoint_url
for key, value in data.items():
if value == "" or (key == "terms" and value is False):
return f"β Failed Submission: '{key}' ensure all fields are completed and that you have agreed to evaluation terms."
try:
response = requests.post(url, json=data)
if response.status_code == 200:
return "β
Submission successful! Please allow for 5 - 10 days for model evaluation to be completed. We will contact you \
through your model's discussion forum if we encounter any issues with your submission."
else:
return f"Submission failed with status code {response.status_code}"
except Exception as e:
return f"βFailed to submit due to an error: {str(e)}"
#with gr.Accordion("Chat with Top Models on the Leaderboard Here π¬", open=False):
#
# chat_model_dropdown = gr.Dropdown(
# choices=VALIDATED_CHAT_MODELS,
# label="Select a leaderboard model to chat with. ",
# multiselect=False,
# value=VALIDATED_CHAT_MODELS[0],
# interactive=True,
# )
#
# #chat_model_selection = chat_model_dropdown.value
# chat_model_selection = 'yuriachermann/My_AGI_llama_2_7B'
#
# def call_api_and_stream_response(query, chat_model):
# """
# Call the API endpoint and yield characters as they are received.
# This function simulates streaming by yielding characters one by one.
# """
# url = inference_endpoint_url
# params = {"query": query, "selected_model": chat_model}
# with requests.get(url, json=params, stream=True) as r: # Use params for query parameters
# for chunk in r.iter_content(chunk_size=1):
# if chunk:
# yield chunk.decode()
#
# def get_response(query, history):
# """
# Wrapper function to call the streaming API and compile the response.
# """
# response = ''
# for char in call_api_and_stream_response(query, chat_model=chat_model_selection):
# if char == '<': # This is stopping condition; adjust as needed.
# break
# response += char
# yield [(f"π€ Response from LLM: {chat_model_selection}", response)] # Correct format for Gradio Chatbot
##
#
# chatbot = gr.Chatbot()
# msg = gr.Textbox()
# submit = gr.Button("Submit")
# clear = gr.Button("Clear")
# def user(user_message, history):
# return "", history + [[user_message, None]]
# def clear_chat(*args):
# return [] # Returning an empty list to signify clearing the chat, adjust as per Gradio's capabilities
# submit.click(
# fn=get_response,
# inputs=[msg, chatbot],
# outputs=chatbot
# )
# clear.click(
# fn=clear_chat,
# inputs=None,
# outputs=chatbot
# )
#
with gr.Tabs(elem_classes="tab-buttons") as tabs:
with gr.TabItem("π LLM Leaderboard", elem_id="llm-benchmark-table", id=0):
with gr.Row():
with gr.Column():
filter_hw = gr.CheckboxGroup(choices=["Gaudi","Xeon","GPU Max","Arc GPU","Core Ultra"],
label="Select Training Platform*",
elem_id="compute_platforms",
value=["Gaudi","Xeon","GPU Max","Arc GPU","Core Ultra"])
filter_platform = gr.CheckboxGroup(choices=["Intel Developer Cloud","AWS","Azure","Google Cloud Platform","Local"],
label="Training Infrastructure*",
elem_id="training_infra",
value=["Intel Developer Cloud","AWS","Azure","Google Cloud Platform","Local"])
filter_affiliation = gr.CheckboxGroup(choices=["No Affiliation","Intel Innovator","Student Ambassador","Intel Liftoff", "Intel Engineering", "Other"],
label="Intel Program Affiliation",
elem_id="program_affiliation",
value=["No Affiliation","Intel Innovator","Student Ambassador","Intel Liftoff", "Intel Engineering", "Other"])
with gr.Column():
filter_size = gr.CheckboxGroup(choices=[1,2,3,5,7,8,13,35,60,70,100],
label="Model Sizes (Billion of Parameters)",
elem_id="parameter_size",
value=[1,2,3,5,7,8,13,35,60,70,100])
filter_precision = gr.CheckboxGroup(choices=["fp32","fp16","bf16","int8","fp8", "int4"],
label="Model Precision",
elem_id="precision",
value=["fp32","fp16","bf16","int8","fp8", "int4"])
filter_type = gr.CheckboxGroup(choices=["pretrained","fine-tuned","chat-models","merges/moerges"],
label="Model Types",
elem_id="model_types",
value=["pretrained","fine-tuned","chat-models","merges/moerges"])
inbox_text = gr.CheckboxGroup(label = """Inference Tested Column Legend: π¨ = Gaudi, π¦ = Xeon, π₯ = GPU Max, π = Core Ultra, π’ = Arc GPU (Please see "βAbout" tab for more info)""")
# formatting model name and adding links
color = '#2f82d4'
def make_clickable(row):
return f'<a href="https://huggingface.co./{row["Model"]}" target="_blank" style="color: {color}; text-decoration: underline;">{row["Model"]}</a>'
initial_df = pd.read_csv("./status/leaderboard_status_091124.csv")
initial_df["Model"] = initial_df.apply(make_clickable, axis=1)
initial_df = initial_df.sort_values(by='Average', ascending=False)
def update_df(hw_selected, platform_selected, affiliation_selected, size_selected, precision_selected, type_selected):
filtered_df = filter_benchmarks_table(df=initial_df, hw_selected=hw_selected, platform_selected=platform_selected,
affiliation_selected=affiliation_selected, size_selected=size_selected,
precision_selected=precision_selected, type_selected=type_selected)
return filtered_df
initial_filtered_df = update_df(["Gaudi","Xeon","GPU Max","Arc GPU","Core Ultra"],
["Intel Developer Cloud","AWS","Azure","Google Cloud Platform","Local"],
["No Affiliation","Intel Innovator","Student Ambassador","Intel Liftoff", "Intel Engineering", "Other"],
[1,2,3,5,7,8,13,35,60,70,100],
["fp32","fp16","bf16","int8","fp8", "int4"],
["pretrained","fine-tuned","chat-models","merges/moerges"])
gradio_df_display = gr.Dataframe(value=initial_filtered_df, headers=["Inference Tested","Model","Average","ARC","HellaSwag","MMLU",
"TruthfulQA","Winogrande","Training Hardware","Model Type","Precision",
"Size","Infrastructure","Affiliation"],
datatype=["html","html","str","str","str","str","str","str","str","str","str","str","str","str"])
filter_hw.change(fn=update_df,
inputs=[filter_hw, filter_platform, filter_affiliation, filter_size, filter_precision, filter_type],
outputs=[gradio_df_display])
filter_platform.change(fn=update_df,
inputs=[filter_hw, filter_platform, filter_affiliation, filter_size, filter_precision, filter_type],
outputs=[gradio_df_display])
filter_affiliation.change(fn=update_df,
inputs=[filter_hw, filter_platform, filter_affiliation, filter_size, filter_precision, filter_type],
outputs=[gradio_df_display])
filter_size.change(fn=update_df,
inputs=[filter_hw, filter_platform, filter_affiliation, filter_size, filter_precision, filter_type],
outputs=[gradio_df_display])
filter_precision.change(fn=update_df,
inputs=[filter_hw, filter_platform, filter_affiliation, filter_size, filter_precision, filter_type],
outputs=[gradio_df_display])
filter_type.change(fn=update_df,
inputs=[filter_hw, filter_platform, filter_affiliation, filter_size, filter_precision, filter_type],
outputs=[gradio_df_display])
with gr.TabItem("π§° Train a Model", elem_id="getting-started", id=1):
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
with gr.TabItem("π Deployment Tips", elem_id="deployment-tips", id=2):
gr.Markdown(DEPLOY_TEXT, elem_classes="markdown-text")
with gr.TabItem("π©βπ» Developer Programs", elem_id="hardward-program", id=3):
gr.Markdown(PROGRAMS_TEXT, elem_classes="markdown-text")
with gr.TabItem("β About ", elem_id="about", id=5):
gr.Markdown(ABOUT, elem_classes="markdown-text")
with gr.TabItem("ποΈ Submit", elem_id="submit", id=4):
gr.Markdown(SUBMIT_TEXT, elem_classes="markdown-text")
with gr.Row():
gr.Markdown("# Submit Model for Evaluation ποΈ", elem_classes="markdown-text")
with gr.Row():
with gr.Column():
model_name_textbox = gr.Textbox(label="Model name",
info = """ Name of Model in the Hub. For example: 'Intel/neural-chat-7b-v1-1'""",)
revision_name_textbox = gr.Textbox(label="Revision commit (Branch)", placeholder="main")
model_type = gr.Dropdown(
choices=["pretrained","fine-tuned","chat models","merges/moerges"],
label="Model type",
multiselect=False,
value="pretrained",
interactive=True,
)
hw_type = gr.Dropdown(
choices=["Gaudi","Xeon","GPU Max","Arc GPU","Core Ultra"],
label="Training Hardware",
multiselect=False,
value="Gaudi",
interactive=True,
)
terms = gr.Checkbox(
label="Check if you agree to having your model evaluated and published to the leaderboard by our team.",
value=False,
interactive=True,
)
submit_button = gr.Button("π€ Submit Eval π»")
submission_result = gr.Markdown()
with gr.Column():
precision = gr.Dropdown(
choices=["fp32","fp16","bf16","int8","fp8", "int4"],
label="Precision",
multiselect=False,
value="fp16",
interactive=True,
)
weight_type = gr.Dropdown(
choices=["Original", "Adapter", "Delta"],
label="Weights type",
multiselect=False,
value="Original",
interactive=True,
info = """ Select the appropriate weights. If you have fine-tuned or adapted a model with PEFT or Delta-Tuning you likely have
LoRA Adapters or Delta Weights.""",
)
training_infra = gr.Dropdown(
choices=["Intel Developer Cloud","AWS","Azure","Google Cloud Platform","Local"],
label="Training Infrastructure",
multiselect=False,
value="Intel Developer Cloud",
interactive=True,
info = """ Select the infrastructure that the model was developed on.
Local is the ideal choice for Core Ultra, ARC GPUs, and local data center infrastructure.""",
)
affiliation = gr.Dropdown(
choices=["No Affiliation","Intel Innovator","Student Ambassador","Intel Liftoff", "Intel Engineering", "Other"],
label="Affiliation with Intel",
multiselect=False,
value="No Affiliation",
interactive=True,
info = """ Select "No Affiliation" if not part of any Intel programs.""",
)
base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
submit_button.click(
fn=submit_to_endpoint,
inputs=[model_name_textbox, revision_name_textbox, model_type, hw_type, terms, precision, weight_type, training_infra, affiliation, base_model_name_textbox],
outputs=submission_result)
with gr.Accordion("π Citation", open=False):
citation =gr.Textbox(value = CITATION_TEXT,
lines=6,
label="Use the following to cite this content")
gr.Markdown("""<div style="display: flex; justify-content: center;"> <p> Intel, the Intel logo and Gaudi are trademarks of Intel Corporation or its subsidiaries.
*Other names and brands may be claimed as the property of others.
</p> </div>""")
demo.queue()
demo.launch(share=False) |