eduardo-alvarez commited on
Commit
867b5a3
1 Parent(s): 9773d33

enabling chat functionality

Browse files
app.py CHANGED
@@ -1,8 +1,7 @@
1
  import gradio as gr
2
  import pandas as pd
3
  import requests
4
- import socket
5
-
6
 
7
  from info.train_a_model import (
8
  LLM_BENCHMARKS_TEXT)
@@ -16,10 +15,12 @@ from info.citation import(
16
  CITATION_TEXT)
17
  from info.validated_chat_models import(
18
  VALIDATED_CHAT_MODELS)
 
 
19
  from src.processing import filter_benchmarks_table
20
 
21
- #inference_endpoint_url = os.environ['inference_endpoint_url']
22
- #inference_concurrency_limit = os.environ['inference_concurrency_limit']
23
 
24
  demo = gr.Blocks()
25
 
@@ -36,22 +37,8 @@ with demo:
36
  gr.Markdown("""A special shout-out to the 🤗 [Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
37
  team for generously sharing their code and best
38
  practices, ensuring that AI Developers have a valuable and enjoyable tool at their disposal.""")
39
-
40
-
41
- def get_public_ip():
42
- try:
43
- response = requests.get('https://api.ipify.org')
44
- public_ip = response.text
45
- return public_ip
46
- except Exception as e:
47
- return f"Error: {str(e)}"
48
-
49
- public_ip = get_public_ip()
50
-
51
- gr.Markdown(f"ip: {public_ip}")
52
 
53
  with gr.Accordion("Chat with Top Models on the Leaderboard Here 💬", open=False):
54
- # import pdb
55
 
56
  chat_model_dropdown = gr.Dropdown(
57
  choices=VALIDATED_CHAT_MODELS,
@@ -64,34 +51,33 @@ with demo:
64
  #chat_model_selection = chat_model_dropdown.value
65
  chat_model_selection = 'Intel/neural-chat-7b-v1-1'
66
 
67
- #def call_api_and_stream_response(query, chat_model):
68
- # """
69
- # Call the API endpoint and yield characters as they are received.
70
- # This function simulates streaming by yielding characters one by one.
71
- # """
72
- # url = "http://localhost:5004/query-stream/"
73
- # params = {"query": query,"selected_model":chat_model}
74
- # with requests.get(url, json=params, stream=True) as r:
75
- # for chunk in r.iter_content(chunk_size=1):
76
- # if chunk:
77
- # yield chunk.decode()
78
- #
79
- #def get_response(query, history):
80
- # """
81
- # Wrapper function to call the streaming API and compile the response.
82
- # """
83
- # response = ''
84
- #
85
- # global chat_model_selection
86
- #
87
- # for char in call_api_and_stream_response(query, chat_model=chat_model_selection):
88
- # if char == '<':
89
- # break
90
- # response += char
91
- # yield response
92
- #
93
- #gr.ChatInterface(get_response, retry_btn = None, undo_btn=None, concurrency_limit=5).launch()
94
 
 
95
 
96
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
97
  with gr.TabItem("🏆 LLM Leadeboard", elem_id="llm-benchmark-table", id=0):
@@ -105,10 +91,10 @@ with demo:
105
  label="Training Infrastructure*",
106
  elem_id="training_infra",
107
  value=["Intel Developer Cloud","AWS","Azure","Google Cloud Platform","Local"])
108
- filter_affiliation = gr.CheckboxGroup(choices=["No Affiliation","Intel Innovator","Intel Student Ambassador", "Intel Liftoff", "Intel Labs", "Other"],
109
  label="Intel Program Affiliation",
110
  elem_id="program_affiliation",
111
- value=["No Affiliation","Intel Innovator","Intel Student Ambassador", "Intel Software Liftoff", "Intel Labs", "Other"])
112
 
113
  with gr.Column():
114
  filter_size = gr.CheckboxGroup(choices=[1,3,5,7,13,35,60,70,100],
@@ -124,7 +110,7 @@ with demo:
124
  elem_id="model_types",
125
  value=["pretrained","fine-tuned","chat-models","merges/moerges"])
126
 
127
- initial_df = pd.read_csv("./status/leaderboard_status_030424.csv")
128
 
129
  def update_df(hw_selected, platform_selected, affiliation_selected, size_selected, precision_selected, type_selected):
130
  filtered_df = filter_benchmarks_table(df=initial_df, hw_selected=hw_selected, platform_selected=platform_selected,
@@ -133,10 +119,10 @@ with demo:
133
  return filtered_df
134
 
135
  initial_filtered_df = update_df(["Gaudi","Xeon","GPU Max","Arc GPU","Core Ultra"],
136
- ["Intel Developer Cloud","AWS","Azure","GCP","Local"],
137
- ["No Affiliation","Intel Innovator","Intel Student Ambassador", "Intel Software Liftoff", "Intel Labs", "Other"],
138
  [1,3,5,7,13,35,60,70,100],
139
- ["fp8","fp16","bf16","int8","4bit"],
140
  ["pretrained","fine-tuned","chat-models","merges/moerges"])
141
 
142
  gradio_df_display = gr.Dataframe(value=initial_filtered_df)
@@ -167,6 +153,8 @@ with demo:
167
  gr.Markdown(DEPLOY_TEXT, elem_classes="markdown-text")
168
  with gr.TabItem("👩‍💻 Developer Programs", elem_id="hardward-program", id=3):
169
  gr.Markdown(PROGRAMS_TEXT, elem_classes="markdown-text")
 
 
170
  with gr.TabItem("🏎️ Submit", elem_id="submit", id=4):
171
  gr.Markdown(SUBMIT_TEXT, elem_classes="markdown-text")
172
  with gr.Row():
@@ -226,7 +214,7 @@ with demo:
226
  Local is the ideal choice for Core Ultra, ARC GPUs, and local data center infrastructure.""",
227
  )
228
  affiliation = gr.Dropdown(
229
- choices=["No Affiliation","Innovator","Student Ambassador","Intel Liftoff", "Intel Labs", "Other"],
230
  label="Affiliation with Intel",
231
  multiselect=False,
232
  value="No Affiliation",
 
1
  import gradio as gr
2
  import pandas as pd
3
  import requests
4
+ import os
 
5
 
6
  from info.train_a_model import (
7
  LLM_BENCHMARKS_TEXT)
 
15
  CITATION_TEXT)
16
  from info.validated_chat_models import(
17
  VALIDATED_CHAT_MODELS)
18
+ from info.about import(
19
+ ABOUT)
20
  from src.processing import filter_benchmarks_table
21
 
22
+ inference_endpoint_url = os.environ['inference_endpoint_url']
23
+ inference_concurrency_limit = os.environ['inference_concurrency_limit']
24
 
25
  demo = gr.Blocks()
26
 
 
37
  gr.Markdown("""A special shout-out to the 🤗 [Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
38
  team for generously sharing their code and best
39
  practices, ensuring that AI Developers have a valuable and enjoyable tool at their disposal.""")
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
  with gr.Accordion("Chat with Top Models on the Leaderboard Here 💬", open=False):
 
42
 
43
  chat_model_dropdown = gr.Dropdown(
44
  choices=VALIDATED_CHAT_MODELS,
 
51
  #chat_model_selection = chat_model_dropdown.value
52
  chat_model_selection = 'Intel/neural-chat-7b-v1-1'
53
 
54
+ def call_api_and_stream_response(query, chat_model):
55
+ """
56
+ Call the API endpoint and yield characters as they are received.
57
+ This function simulates streaming by yielding characters one by one.
58
+ """
59
+ url = inference_endpoint_url
60
+ params = {"query": query,"selected_model":chat_model}
61
+ with requests.get(url, json=params, stream=True) as r:
62
+ for chunk in r.iter_content(chunk_size=1):
63
+ if chunk:
64
+ yield chunk.decode()
65
+
66
+ def get_response(query, history):
67
+ """
68
+ Wrapper function to call the streaming API and compile the response.
69
+ """
70
+ response = ''
71
+
72
+ global chat_model_selection
73
+
74
+ for char in call_api_and_stream_response(query, chat_model=chat_model_selection):
75
+ if char == '<':
76
+ break
77
+ response += char
78
+ yield response
 
 
79
 
80
+ gr.ChatInterface(get_response, retry_btn = None, undo_btn=None, concurrency_limit=inference_concurrency_limit).launch()
81
 
82
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
83
  with gr.TabItem("🏆 LLM Leadeboard", elem_id="llm-benchmark-table", id=0):
 
91
  label="Training Infrastructure*",
92
  elem_id="training_infra",
93
  value=["Intel Developer Cloud","AWS","Azure","Google Cloud Platform","Local"])
94
+ filter_affiliation = gr.CheckboxGroup(choices=["No Affiliation","Intel Innovator","Student Ambassador","Intel Liftoff", "Intel Engineering", "Other"],
95
  label="Intel Program Affiliation",
96
  elem_id="program_affiliation",
97
+ value=["No Affiliation","Intel Innovator","Student Ambassador","Intel Liftoff", "Intel Engineering", "Other"])
98
 
99
  with gr.Column():
100
  filter_size = gr.CheckboxGroup(choices=[1,3,5,7,13,35,60,70,100],
 
110
  elem_id="model_types",
111
  value=["pretrained","fine-tuned","chat-models","merges/moerges"])
112
 
113
+ initial_df = pd.read_csv("./status/leaderboard_status_030824.csv")
114
 
115
  def update_df(hw_selected, platform_selected, affiliation_selected, size_selected, precision_selected, type_selected):
116
  filtered_df = filter_benchmarks_table(df=initial_df, hw_selected=hw_selected, platform_selected=platform_selected,
 
119
  return filtered_df
120
 
121
  initial_filtered_df = update_df(["Gaudi","Xeon","GPU Max","Arc GPU","Core Ultra"],
122
+ ["Intel Developer Cloud","AWS","Azure","Google Cloud Platform","Local"],
123
+ ["No Affiliation","Intel Innovator","Student Ambassador","Intel Liftoff", "Intel Engineering", "Other"],
124
  [1,3,5,7,13,35,60,70,100],
125
+ ["fp32","fp16","bf16","int8","fp8", "int4"],
126
  ["pretrained","fine-tuned","chat-models","merges/moerges"])
127
 
128
  gradio_df_display = gr.Dataframe(value=initial_filtered_df)
 
153
  gr.Markdown(DEPLOY_TEXT, elem_classes="markdown-text")
154
  with gr.TabItem("👩‍💻 Developer Programs", elem_id="hardward-program", id=3):
155
  gr.Markdown(PROGRAMS_TEXT, elem_classes="markdown-text")
156
+ with gr.TabItem("❓ About ", elem_id="about", id=5):
157
+ gr.Markdown(ABOUT, elem_classes="markdown-text")
158
  with gr.TabItem("🏎️ Submit", elem_id="submit", id=4):
159
  gr.Markdown(SUBMIT_TEXT, elem_classes="markdown-text")
160
  with gr.Row():
 
214
  Local is the ideal choice for Core Ultra, ARC GPUs, and local data center infrastructure.""",
215
  )
216
  affiliation = gr.Dropdown(
217
+ choices=["No Affiliation","Intel Innovator","Student Ambassador","Intel Liftoff", "Intel Engineering", "Other"],
218
  label="Affiliation with Intel",
219
  multiselect=False,
220
  value="No Affiliation",
info/about.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def get_public_ip():
2
+ try:
3
+ response = requests.get('https://api.ipify.org')
4
+ public_ip = response.text
5
+ return public_ip
6
+ except Exception as e:
7
+ return f"Error: {str(e)}"
8
+
9
+ public_ip = get_public_ip()
10
+
11
+ ABOUT = f"""
12
+ # ❓ About
13
+
14
+ At Powered-by-Intel LLM Leaderboard we conduct the same benchmarks as the Open LLM Leaderboard and plan to add
15
+ domain-specific benchmarks in the future. We utilize the <a href="https://github.com/EleutherAI/lm-evaluation-harness" target="_blank">
16
+ Eleuther AI Language Model Evaluation Harness </a>, a unified framework to test generative language models on a large number of
17
+ different evaluation tasks.
18
+
19
+ Our current benchmarks include:
20
+
21
+ - <a href="https://arxiv.org/abs/1803.05457" target="_blank"> AI2 Reasoning Challenge (25-shot)</a> - a set of grade-school science questions.
22
+ - <a href="https://arxiv.org/abs/1905.07830" target="_blank"> HellaSwag (10-shot)</a> - a test of commonsense inference, which is easy for humans (~95%) but challenging for state-of-the-art models.
23
+ - <a href="https://arxiv.org/abs/2009.03300" target="_blank"> MMLU (5-shot)</a> - a test measuring a text model's multitask accuracy, covering 57 tasks in fields like elementary mathematics, US history, computer science, law, and more.
24
+ - <a href="https://arxiv.org/abs/2109.07958" target="_blank"> TruthfulQA (0-shot)</a> - a test measuring a model's propensity to reproduce falsehoods commonly found online. Note: TruthfulQA is technically a 6-shot task in the Harness because each example is prepended with 6 Q/A pairs, even in the 0-shot setting.
25
+ - <a href="https://arxiv.org/abs/1907.10641" target="_blank"> Winogrande (5-shot)</a> - an adversarial and difficult Winograd benchmark at scale, for commonsense reasoning.
26
+ - <a href="https://arxiv.org/abs/2110.14168" target="_blank"> GSM8k (5-shot)</a> - diverse grade school math word problems measuring a model's ability to solve multi-step mathematical reasoning problems.
27
+ For all these evaluations, a higher score is better. We've chosen these benchmarks as they test a variety of reasoning and general knowledge across a wide variety of fields in 0-shot and few-shot settings. In the future, we plan to add domain-specific benchmarks to further evaluate our models.
28
+
29
+ We run an adapted version of the benchmark code specifically designed to run the EleutherAI Harness benchmarks on Gaudi processors.
30
+ This adapted evaluation harness is built into the Hugging Face Optimum Habana Library. Review the documentation [here](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation).
31
+
32
+ ## Support and Community
33
+
34
+ Join 5000+ developers on the [Intel DevHub Discord](https://discord.gg/yNYNxK2k) to get support with your submission
35
+ and talk about everything from GenAI, HPC, to Quantum Computing.
36
+
37
+ ## "Chat with Top Models on the Leaderboard Here 💬" Functionality
38
+
39
+ This is a fun on-leaderboard LLM chat functionality designed to provide a quick way to test the top LLMs on the leaderboard.
40
+ As the leaderboard matures and users submit models, we will rotate the available models for chat. Who knows!? You might find
41
+ your model featured here soon! ⭐
42
+
43
+ ### Chat Functionality Notice
44
+ - All the models in this demo run on 4th Generation Intel® Xeon® (Sapphire Rapids) utilizing AMX operations and quantized inference optimizations.
45
+ - Terms of use: By using the chat functionality, users are required to agree to the following terms: The service is a research preview intended for non-commercial
46
+ use only. It can produce factually incorrect output, and should not be relied on to produce factually accurate information.
47
+ The service only provides limited safety measures and may generate lewd, biased or otherwise offensive content. It must not be
48
+ used for any illegal, harmful, violent, racist, or sexual purposes. The service may collect user dialogue data for future research.
49
+ - License: The chat functionality is a research preview intended for non-commercial use only.
50
+
51
+ space ip: {public_ip}
52
+ """
info/deployment.py CHANGED
@@ -90,6 +90,34 @@ helps you choose the best option for your specific use case. Happy building!
90
 
91
  <hr>
92
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  # Intel® Max Series GPU
94
  The Intel® Data Center GPU Max Series is Intel's highest performing, highest density, general-purpose discrete GPU, which packs over 100 billion transistors into one package and contains up to 128 Xe Cores--Intel's foundational GPU compute building block. You can learn more about this GPU [here](https://www.intel.com/content/www/us/en/products/details/discrete-gpus/data-center-gpu/max-series.html).
95
 
@@ -237,34 +265,7 @@ pipe("In the spring, beautiful flowers bloom...")
237
 
238
  <hr>
239
 
240
- # Intel® Gaudi Accelerators
241
- The Intel Gaudi 2 accelerator is Intel's most capable deep learning chip. You can learn about Gaudi 2 [here](https://habana.ai/products/gaudi2/).
242
-
243
- Intel Gaudi Software supports PyTorch and DeepSpeed for accelerating LLM training and inference.
244
- The Intel Gaudi Software graph compiler will optimize the execution of the operations accumulated in the graph
245
- (e.g. operator fusion, data layout management, parallelization, pipelining and memory management,
246
- and graph-level optimizations).
247
-
248
- Optimum Habana provides covenient functionality for various tasks. Below is a command line snippet to run inference on Gaudi with meta-llama/Llama-2-7b-hf.
249
-
250
- 👍[Optimum Habana GitHub](https://github.com/huggingface/optimum-habana)
251
-
252
- The "run_generation.py" script below can be found [here on GitHub](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)
253
-
254
- ```bash
255
- python run_generation.py \
256
- --model_name_or_path meta-llama/Llama-2-7b-hf \
257
- --use_hpu_graphs \
258
- --use_kv_cache \
259
- --max_new_tokens 100 \
260
- --do_sample \
261
- --batch_size 2 \
262
- --prompt "Hello world" "How are you?"
263
-
264
- ```
265
- <hr>
266
-
267
- # Intel Arc GPUs
268
  You can learn more about Arc GPUs [here](https://www.intel.com/content/www/us/en/products/details/discrete-gpus/arc.html).
269
 
270
  Code snippets coming soon!
 
90
 
91
  <hr>
92
 
93
+ # Intel® Gaudi® Accelerators
94
+ The Intel Gaudi 2 accelerator is Intel's most capable deep learning chip. You can learn about Gaudi 2 [here](https://habana.ai/products/gaudi2/).
95
+
96
+ Intel Gaudi Software supports PyTorch and DeepSpeed for accelerating LLM training and inference.
97
+ The Intel Gaudi Software graph compiler will optimize the execution of the operations accumulated in the graph
98
+ (e.g. operator fusion, data layout management, parallelization, pipelining and memory management,
99
+ and graph-level optimizations).
100
+
101
+ Optimum Habana provides covenient functionality for various tasks. Below is a command line snippet to run inference on Gaudi with meta-llama/Llama-2-7b-hf.
102
+
103
+ 👍[Optimum Habana GitHub](https://github.com/huggingface/optimum-habana)
104
+
105
+ The "run_generation.py" script below can be found [here on GitHub](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)
106
+
107
+ ```bash
108
+ python run_generation.py \
109
+ --model_name_or_path meta-llama/Llama-2-7b-hf \
110
+ --use_hpu_graphs \
111
+ --use_kv_cache \
112
+ --max_new_tokens 100 \
113
+ --do_sample \
114
+ --batch_size 2 \
115
+ --prompt "Hello world" "How are you?"
116
+
117
+ ```
118
+
119
+ <hr>
120
+
121
  # Intel® Max Series GPU
122
  The Intel® Data Center GPU Max Series is Intel's highest performing, highest density, general-purpose discrete GPU, which packs over 100 billion transistors into one package and contains up to 128 Xe Cores--Intel's foundational GPU compute building block. You can learn more about this GPU [here](https://www.intel.com/content/www/us/en/products/details/discrete-gpus/data-center-gpu/max-series.html).
123
 
 
265
 
266
  <hr>
267
 
268
+ # Intel® Arc GPUs
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
269
  You can learn more about Arc GPUs [here](https://www.intel.com/content/www/us/en/products/details/discrete-gpus/arc.html).
270
 
271
  Code snippets coming soon!
info/train_a_model.py CHANGED
@@ -8,7 +8,7 @@ Below, you can find documentation on how to access free and paid resources to tr
8
  ## Intel Developer Cloud - Quick Start
9
  The Intel Developer Cloud is one of the best places to access free and paid compute instances for model training. Intel offers Jupyter Notebook instances supported by
10
  224 Core 4th Generation Xeon Bare Metal nodes with 4x GPU Max Series 1100. To access these resources please follow the instructions below:
11
- 1. Visit the [Intel Developer Cloud](https://cloud.intel.com/) and sign up for the "Standard - Free" tier to get started.
12
  2. Navigate to the "Training" module under the "Software" section in the left panel.
13
  3. Under the GenAI Essentials section, select the LLM Fine-Tuning with QLoRA notebook and click "Launch".
14
  4. Follow the instructions in the notebook to train your model using Intel® Data Center GPU Max 1100.
 
8
  ## Intel Developer Cloud - Quick Start
9
  The Intel Developer Cloud is one of the best places to access free and paid compute instances for model training. Intel offers Jupyter Notebook instances supported by
10
  224 Core 4th Generation Xeon Bare Metal nodes with 4x GPU Max Series 1100. To access these resources please follow the instructions below:
11
+ 1. Visit the [Intel Developer Cloud](https://bit.ly/inteldevelopercloud) and sign up for the "Standard - Free" tier to get started.
12
  2. Navigate to the "Training" module under the "Software" section in the left panel.
13
  3. Under the GenAI Essentials section, select the LLM Fine-Tuning with QLoRA notebook and click "Launch".
14
  4. Follow the instructions in the notebook to train your model using Intel® Data Center GPU Max 1100.
status/leaderboard_status_030424.csv CHANGED
@@ -1,5 +1,5 @@
1
  Model,Average,Hardware,Model Type,Precision,Size,Infrastructure,ARC,HellaSwag,MMLU,TruthfulQA,Winogrande,GSM8K,Affiliation
2
- Intel/neural-chat-7b-v3-3,69.83,Gaudi,fine-tuned,fp16,7,Intel Developer Cloud,66.89,85.26,63.07,63.01,79.64,61.11,Intel Labs
3
- Intel/neural-chat-7b-v3-2,68.29,Gaudi,fine-tuned,fp16,7,Intel Developer Cloud,67.49,83.92,63.55,59.68,79.65,55.12,Intel Labs
4
- Intel/neural-chat-7b-v3-1,61.59,Gaudi,fine-tuned,fp16,7,Intel Developer Cloud,66.21,83.64,62.37,59.65,78.14,19.56,Intel Labs
5
- Intel/neural-chat-7b-v3,58.46,Gaudi,fine-tuned,fp16,7,Intel Developer Cloud,67.15,83.29,62.26,58.77,78.06,1.21,Intel Labs
 
1
  Model,Average,Hardware,Model Type,Precision,Size,Infrastructure,ARC,HellaSwag,MMLU,TruthfulQA,Winogrande,GSM8K,Affiliation
2
+ Intel/neural-chat-7b-v3-3,69.83,Gaudi,fine-tuned,fp16,7,Intel Developer Cloud,66.89,85.26,63.07,63.01,79.64,61.11,Intel Engineering
3
+ Intel/neural-chat-7b-v3-2,68.29,Gaudi,fine-tuned,fp16,7,Intel Developer Cloud,67.49,83.92,63.55,59.68,79.65,55.12,Intel Engineering
4
+ Intel/neural-chat-7b-v3-1,61.59,Gaudi,fine-tuned,fp16,7,Intel Developer Cloud,66.21,83.64,62.37,59.65,78.14,19.56,Intel Engineering
5
+ Intel/neural-chat-7b-v3,58.46,Gaudi,fine-tuned,fp16,7,Intel Developer Cloud,67.15,83.29,62.26,58.77,78.06,1.21,Intel Engineering
status/leaderboard_status_030824.csv ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ Model,Average,Hardware,Model Type,Precision,Size,Infrastructure,ARC,HellaSwag,MMLU,TruthfulQA,Winogrande,GSM8K,Affiliation
2
+ Intel/neural-chat-7b-v3-3,69.83,Gaudi,fine-tuned,fp16,7,Intel Developer Cloud,66.89,85.26,63.07,63.01,79.64,61.11,Intel Engineering
3
+ Intel/neural-chat-7b-v3-2,68.29,Gaudi,fine-tuned,fp16,7,Intel Developer Cloud,67.49,83.92,63.55,59.68,79.65,55.12,Intel Engineering
4
+ Intel/neural-chat-7b-v3-1,61.59,Gaudi,fine-tuned,fp16,7,Intel Developer Cloud,66.21,83.64,62.37,59.65,78.14,19.56,Intel Engineering
5
+ Intel/neural-chat-7b-v3,58.46,Gaudi,fine-tuned,fp16,7,Intel Developer Cloud,67.15,83.29,62.26,58.77,78.06,1.21,Intel Engineering
6
+ Intel/neural-chat-7b-v3-1,61.59,Gaudi,fine-tuned,int8,7,Intel Developer Cloud,65.7,83.54,62.12,59.48,78.61,20.09,Intel Engineering
7
+ Intel/neural-chat-7b-v3-1,61.54,Gaudi,fine-tuned,bf16,7,Intel Developer Cloud,66.3,83.6,62.44,59.54,77.98,19.41,Intel Engineering
8
+ Intel/neural-chat-7b-v3-1,59.9,Gaudi,fine-tuned,int4,7,Intel Developer Cloud,64.25,82.49,60.79,56.4,77.35,18.12,Intel Engineering