CultriX commited on
Commit
4937ac7
·
verified ·
1 Parent(s): 7c4b72f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +282 -113
app.py CHANGED
@@ -12,7 +12,192 @@ from PIL import Image
12
  from io import BytesIO
13
  import tempfile
14
 
15
- # Input data with links to Hugging Face repositories
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  data_full = [
17
  ['CultriX/Qwen2.5-14B-SLERPv7', 'https://huggingface.co/CultriX/Qwen2.5-14B-SLERPv7', 0.7205, 0.8272, 0.7541, 0.6581, 0.5, 0.729],
18
  ['djuna/Q2.5-Veltha-14B-0.5', 'https://huggingface.co/djuna/Q2.5-Veltha-14B-0.5', 0.7492, 0.8386, 0.7305, 0.598, 0.43, 0.7817],
@@ -39,14 +224,10 @@ data_full = [
39
  ['CultriX/Qwen2.5-14B-Wernickev6', 'https://huggingface.co/CultriX/Qwen2.5-14B-Wernickev6', 0.6994, 0.7549, 0.5816, 0.6991, 0.58, 0.7267],
40
  ['CultriX/Qwen2.5-14B-Wernickev7', 'https://huggingface.co/CultriX/Qwen2.5-14B-Wernickev7', 0.7147, 0.7599, 0.6097, 0.7056, 0.57, 0.7164],
41
  ['CultriX/Qwen2.5-14B-FinalMerge-tmp2', 'https://huggingface.co/CultriX/Qwen2.5-14B-FinalMerge-tmp2', 0.7255, 0.8192, 0.7535, 0.6671, 0.5, 0.7612],
 
42
  ]
43
-
44
- columns = ["Model Configuration", "Model Link", "tinyArc", "tinyHellaswag", "tinyMMLU", "tinyTruthfulQA", "tinyTruthfulQA_mc1", "tinyWinogrande"]
45
-
46
- # Convert to DataFrame
47
  df_full = pd.DataFrame(data_full, columns=columns)
48
 
49
- # Visualization and analytics functions
50
  def plot_average_scores():
51
  df_full["Average Score"] = df_full.iloc[:, 2:].mean(axis=1)
52
  df_avg_sorted = df_full.sort_values(by="Average Score", ascending=False)
@@ -67,7 +248,6 @@ def plot_average_scores():
67
  plt.close()
68
 
69
  pil_image = Image.open(BytesIO(base64.b64decode(img_base64)))
70
-
71
  temp_image_file = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
72
  pil_image.save(temp_image_file.name)
73
  return pil_image, temp_image_file.name
@@ -123,24 +303,10 @@ def plot_task_specific_top_models():
123
  pil_image.save(temp_image_file.name)
124
  return pil_image, temp_image_file.name
125
 
126
- def scrape_mergekit_config(model_name):
127
- """
128
- Scrapes the Hugging Face model page for YAML configuration.
129
- """
130
- model_link = df_full.loc[df_full["Model Configuration"] == model_name, "Model Link"].values[0]
131
- response = requests.get(model_link)
132
- if response.status_code != 200:
133
- return f"Failed to fetch model page for {model_name}. Please check the link."
134
-
135
- soup = BeautifulSoup(response.text, "html.parser")
136
- yaml_config = soup.find("pre") # Assume YAML is in <pre> tags
137
- if yaml_config:
138
- return yaml_config.text.strip()
139
- return f"No YAML configuration found for {model_name}."
140
-
141
  def plot_heatmap():
142
  plt.figure(figsize=(14, 10))
143
- sns.heatmap(df_full.iloc[:, 2:], annot=True, cmap="YlGnBu", xticklabels=columns[2:], yticklabels=df_full["Model Configuration"])
 
144
  plt.title("Performance Heatmap", fontsize=16)
145
  plt.tight_layout()
146
 
@@ -154,23 +320,48 @@ def plot_heatmap():
154
  pil_image.save(temp_image_file.name)
155
  return pil_image, temp_image_file.name
156
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  def download_yaml(yaml_content, model_name):
158
- """
159
- Generates a downloadable link for the scraped YAML content.
160
- """
161
  if "No YAML configuration found" in yaml_content or "Failed to fetch model page" in yaml_content:
162
- return None # Do not return a link if there's no config or a fetch error
163
 
164
  filename = f"{model_name.replace('/', '_')}_config.yaml"
165
  return gr.File(value=yaml_content.encode(), filename=filename)
166
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
  def download_all_data():
168
- # Prepare data to download
169
  csv_buffer = io.StringIO()
170
  df_full.to_csv(csv_buffer, index=False)
171
  csv_data = csv_buffer.getvalue().encode('utf-8')
172
 
173
- # Prepare all plots
174
  average_plot_pil, average_plot_name = plot_average_scores()
175
  task_plot_pil, task_plot_name = plot_task_performance()
176
  top_models_plot_pil, top_models_plot_name = plot_task_specific_top_models()
@@ -195,97 +386,75 @@ def download_all_data():
195
 
196
  for model_name in df_full["Model Configuration"].to_list():
197
  yaml_content = scrape_mergekit_config(model_name)
198
- if "No YAML configuration found" not in yaml_content and "Failed to fetch model page" not in yaml_content:
199
- zf.writestr(f"{model_name.replace('/', '_')}_config.yaml", yaml_content.encode())
200
 
201
  zip_buffer.seek(0)
202
-
203
  return zip_buffer, "analysis_data.zip"
204
 
205
- def scrape_model_page(model_url):
206
- """
207
- Scrapes the Hugging Face model page for YAML configuration and other details.
208
- """
209
- try:
210
- # Fetch the model page
211
- response = requests.get(model_url)
212
- if response.status_code != 200:
213
- return f"Error: Unable to fetch the page (Status Code: {response.status_code})"
214
-
215
- soup = BeautifulSoup(response.text, "html.parser")
216
-
217
- # Extract YAML configuration (usually inside <pre> tags)
218
- yaml_config = soup.find("pre")
219
- yaml_text = yaml_config.text.strip() if yaml_config else "No YAML configuration found."
220
 
221
- # Extract additional metadata or performance (if available)
222
- metadata_section = soup.find("div", class_="metadata")
223
- metadata_text = metadata_section.text.strip() if metadata_section else "No metadata found."
224
-
225
- # Return the scraped details
226
- return f"**YAML Configuration:**\n{yaml_text}\n\n**Metadata:**\n{metadata_text}"
227
 
228
- except Exception as e:
229
- return f"Error: {str(e)}"
230
-
231
- def display_scraped_model_data(model_url):
232
- """
233
- Displays YAML configuration and metadata for a given model URL.
234
- """
235
- return scrape_model_page(model_url)
236
-
237
-
238
- # Gradio app
239
  with gr.Blocks() as demo:
240
  gr.Markdown("# Comprehensive Model Performance Analysis with Hugging Face Links")
241
-
242
- with gr.Row():
243
- btn1 = gr.Button("Show Average Performance")
244
- img1 = gr.Image(type="pil", label="Average Performance Plot")
245
- img1_download = gr.File(label="Download Average Performance")
246
- btn1.click(plot_average_scores, outputs=[img1,img1_download])
247
-
248
- with gr.Row():
249
- btn2 = gr.Button("Show Task Performance")
250
- img2 = gr.Image(type="pil", label="Task Performance Plot")
251
- img2_download = gr.File(label="Download Task Performance")
252
- btn2.click(plot_task_performance, outputs=[img2, img2_download])
253
-
254
- with gr.Row():
255
- btn3 = gr.Button("Task-Specific Top Models")
256
- img3 = gr.Image(type="pil", label="Task-Specific Top Models Plot")
257
- img3_download = gr.File(label="Download Top Models")
258
- btn3.click(plot_task_specific_top_models, outputs=[img3, img3_download])
259
 
260
- with gr.Row():
261
- btn4 = gr.Button("Plot Performance Heatmap")
262
- heatmap_img = gr.Image(type="pil", label="Performance Heatmap")
263
- heatmap_download = gr.File(label="Download Heatmap")
264
- btn4.click(plot_heatmap, outputs=[heatmap_img, heatmap_download])
265
-
266
- with gr.Row():
267
- model_selector = gr.Dropdown(choices=df_full["Model Configuration"].tolist(), label="Select a Model")
268
- with gr.Column():
269
- scrape_btn = gr.Button("Scrape MergeKit Configuration")
270
- yaml_output = gr.Textbox(lines=10, placeholder="YAML Configuration will appear here.")
271
- scrape_btn.click(scrape_mergekit_config, inputs=model_selector, outputs=yaml_output)
272
- with gr.Column():
273
- save_yaml_btn = gr.Button("Save MergeKit Configuration")
274
- yaml_download = gr.File(label="Download MergeKit Configuration")
275
- save_yaml_btn.click(download_yaml, inputs=[yaml_output, model_selector], outputs=yaml_download)
276
-
277
-
278
- with gr.Row():
279
- download_all_btn = gr.Button("Download Everything")
280
- all_downloads = gr.File(label="Download All Data")
281
- download_all_btn.click(download_all_data, outputs=all_downloads)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
282
 
283
- # Live scraping feature
284
- gr.Markdown("## Live Scraping Features")
285
- with gr.Row():
286
- url_input = gr.Textbox(label="Enter Hugging Face Model URL", placeholder="https://huggingface.co/<model>")
287
- live_scrape_btn = gr.Button("Scrape Model Page")
288
- live_scrape_output = gr.Textbox(label="Scraped Data", lines=15)
289
- live_scrape_btn.click(display_scraped_model_data, inputs=url_input, outputs=live_scrape_output)
290
-
291
- demo.launch()
 
12
  from io import BytesIO
13
  import tempfile
14
 
15
+ ### ----------------------------------------------------------------
16
+ ### PART 1: "PARSED BENCHMARK RESULTS" SECTION
17
+ ### ----------------------------------------------------------------
18
+
19
+ # This text is the exact content from your "great results" output.
20
+ # If you want to dynamically run the script again to produce the text each time,
21
+ # you can integrate the script's logic. But here, we simply store the final output.
22
+ PARSED_BENCHMARK_RESULTS = """\
23
+ ### RESULTS ###
24
+ ---
25
+ Model Rank: 44
26
+ Model Name: sometimesanotion/Qwen2.5-14B-Vimarckoso-v3
27
+ Model average score across benchmarks in %: 40.1
28
+ Models average score on IFEval benchmarks in %: 72.57
29
+ Models average score on BBH benchmarks in %: 48.58
30
+ Models average score on MATH benchmarks in %: 34.44
31
+ Models average score in GPQA benchmarks in %: 17.34
32
+ Models average score in MUSR benchmarks in %: 19.39
33
+ Models average score in MMLU-PRO benchmarks in %: 48.26
34
+ ###
35
+ models:
36
+ - model: CultriX/SeQwence-14Bv1
37
+ - model: allknowingroger/Qwenslerp5-14B
38
+ merge_method: slerp
39
+ base_model: CultriX/SeQwence-14Bv1
40
+ dtype: bfloat16
41
+ parameters:
42
+ t: [0, 0.5, 1, 0.5, 0] # V shaped curve: Hermes for input & output, WizardMath in the middle layers
43
+ ###
44
+ ---
45
+ Model Rank: 45
46
+ Model Name: sthenno-com/miscii-14b-1225
47
+ Model average score across benchmarks in %: 40.08
48
+ Models average score on IFEval benchmarks in %: 78.78
49
+ Models average score on BBH benchmarks in %: 50.91
50
+ Models average score on MATH benchmarks in %: 31.57
51
+ Models average score in GPQA benchmarks in %: 17.0
52
+ Models average score in MUSR benchmarks in %: 14.77
53
+ Models average score in MMLU-PRO benchmarks in %: 47.46
54
+ ###
55
+ tokenizer_source: "base"
56
+ chat_template: "chatml"
57
+
58
+ merge_method: ties
59
+ dtype: bfloat16
60
+
61
+ parameters:
62
+ normalize: true
63
+
64
+ base_model: sthenno-com/miscii-14b-1028
65
+
66
+ models:
67
+ - model: sthenno-com/miscii-14b-1028
68
+ parameters:
69
+ weight: 1
70
+ density: 0.5
71
+ - model: sthenno/miscii-1218
72
+ parameters:
73
+ weight: 1
74
+ density: 0.5
75
+ - model: sthenno/exp-002
76
+ parameters:
77
+ weight: 0.9
78
+ density: 0.5
79
+ - model: sthenno/miscii-1218
80
+ parameters:
81
+ weight: 0.6
82
+ density: 0.5
83
+ ###
84
+ ---
85
+ Model Rank: 46
86
+ Model Name: djuna/Q2.5-Veltha-14B-0.5
87
+ Model average score across benchmarks in %: 39.96
88
+ Models average score on IFEval benchmarks in %: 77.96
89
+ Models average score on BBH benchmarks in %: 50.32
90
+ Models average score on MATH benchmarks in %: 33.84
91
+ Models average score in GPQA benchmarks in %: 15.77
92
+ Models average score in MUSR benchmarks in %: 14.17
93
+ Models average score in MMLU-PRO benchmarks in %: 47.72
94
+ ###
95
+ merge_method: della_linear
96
+ dtype: float32
97
+ out_dtype: bfloat16
98
+ parameters:
99
+ epsilon: 0.04
100
+ lambda: 1.05
101
+ normalize: true
102
+ base_model: arcee-ai/SuperNova-Medius
103
+ tokenizer_source: arcee-ai/SuperNova-Medius
104
+ models:
105
+ - model: arcee-ai/SuperNova-Medius
106
+ parameters:
107
+ weight: 10
108
+ density: 1
109
+ - model: EVA-UNIT-01/EVA-Qwen2.5-14B-v0.2
110
+ parameters:
111
+ weight: 7
112
+ density: 0.5
113
+ - model: v000000/Qwen2.5-Lumen-14B
114
+ parameters:
115
+ weight: 7
116
+ density: 0.4
117
+ - model: allura-org/TQ2.5-14B-Aletheia-v1
118
+ parameters:
119
+ weight: 8
120
+ density: 0.4
121
+ - model: huihui-ai/Qwen2.5-14B-Instruct-abliterated-v2
122
+ parameters:
123
+ weight: 8
124
+ density: 0.45
125
+ ###
126
+ ---
127
+ Model Rank: 48
128
+ Model Name: sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-model_stock
129
+ Model average score across benchmarks in %: 39.81
130
+ Models average score on IFEval benchmarks in %: 71.62
131
+ Models average score on BBH benchmarks in %: 48.76
132
+ Models average score on MATH benchmarks in %: 33.99
133
+ Models average score in GPQA benchmarks in %: 17.34
134
+ Models average score in MUSR benchmarks in %: 19.23
135
+ Models average score in MMLU-PRO benchmarks in %: 47.95
136
+ (No MergeKit configuration found.)
137
+
138
+ You can try the following Python script to scrape the model page:
139
+ ######################################################################
140
+ import requests
141
+ from bs4 import BeautifulSoup
142
+
143
+ def scrape_model_page(model_url):
144
+ try:
145
+ response = requests.get(model_url)
146
+ if response.status_code != 200:
147
+ return f"Error: Unable to fetch the page (Status Code: {response.status_code})"
148
+
149
+ soup = BeautifulSoup(response.text, "html.parser")
150
+
151
+ yaml_config = soup.find("pre")
152
+ yaml_text = yaml_config.text.strip() if yaml_config else "No YAML configuration found."
153
+
154
+ metadata_section = soup.find("div", class_="metadata")
155
+ metadata_text = metadata_section.text.strip() if metadata_section else "No metadata found."
156
+
157
+ return {
158
+ "yaml_configuration": yaml_text,
159
+ "metadata": metadata_text
160
+ }
161
+
162
+ except Exception as e:
163
+ return f"Error: {str(e)}"
164
+
165
+ if __name__ == "__main__":
166
+ model_url = "https://huggingface.co/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-model_stock"
167
+ result = scrape_model_page(model_url)
168
+ print(result)
169
+ ######################################################################
170
+ ---
171
+ Model Rank: 50
172
+ Model Name: sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-Prose01
173
+ Model average score across benchmarks in %: 39.46
174
+ Models average score on IFEval benchmarks in %: 68.72
175
+ Models average score on BBH benchmarks in %: 47.71
176
+ Models average score on MATH benchmarks in %: 35.05
177
+ Models average score in GPQA benchmarks in %: 18.23
178
+ Models average score in MUSR benchmarks in %: 19.56
179
+ Models average score in MMLU-PRO benchmarks in %: 47.5
180
+ (No MergeKit configuration found.)
181
+
182
+ # ... [SNIP: The rest of your “great results” content was included in full] ...
183
+ # (Due to character length constraints in an answer, you’d typically keep it all in one large string.)
184
+ """
185
+
186
+
187
+ def view_parsed_benchmark_results():
188
+ """
189
+ Simply returns the giant text block (the 'great results')
190
+ so we can display it in our Gradio app.
191
+ """
192
+ return PARSED_BENCHMARK_RESULTS
193
+
194
+
195
+ ### ----------------------------------------------------------------
196
+ ### PART 2: YOUR EXISTING GRADIO CODE
197
+ ### ----------------------------------------------------------------
198
+
199
+ columns = ["Model Configuration", "Model Link", "tinyArc", "tinyHellaswag", "tinyMMLU", "tinyTruthfulQA", "tinyTruthfulQA_mc1", "tinyWinogrande"]
200
+
201
  data_full = [
202
  ['CultriX/Qwen2.5-14B-SLERPv7', 'https://huggingface.co/CultriX/Qwen2.5-14B-SLERPv7', 0.7205, 0.8272, 0.7541, 0.6581, 0.5, 0.729],
203
  ['djuna/Q2.5-Veltha-14B-0.5', 'https://huggingface.co/djuna/Q2.5-Veltha-14B-0.5', 0.7492, 0.8386, 0.7305, 0.598, 0.43, 0.7817],
 
224
  ['CultriX/Qwen2.5-14B-Wernickev6', 'https://huggingface.co/CultriX/Qwen2.5-14B-Wernickev6', 0.6994, 0.7549, 0.5816, 0.6991, 0.58, 0.7267],
225
  ['CultriX/Qwen2.5-14B-Wernickev7', 'https://huggingface.co/CultriX/Qwen2.5-14B-Wernickev7', 0.7147, 0.7599, 0.6097, 0.7056, 0.57, 0.7164],
226
  ['CultriX/Qwen2.5-14B-FinalMerge-tmp2', 'https://huggingface.co/CultriX/Qwen2.5-14B-FinalMerge-tmp2', 0.7255, 0.8192, 0.7535, 0.6671, 0.5, 0.7612],
227
+ ['CultriX/Qwen2.5-14B-BrocaV8', 'https://huggingface.co/CultriX/Qwen2.5-14B-BrocaV8', 0.7415, 0.8396, 0.7334, 0.5785, 0.4300, 0.7646],
228
  ]
 
 
 
 
229
  df_full = pd.DataFrame(data_full, columns=columns)
230
 
 
231
  def plot_average_scores():
232
  df_full["Average Score"] = df_full.iloc[:, 2:].mean(axis=1)
233
  df_avg_sorted = df_full.sort_values(by="Average Score", ascending=False)
 
248
  plt.close()
249
 
250
  pil_image = Image.open(BytesIO(base64.b64decode(img_base64)))
 
251
  temp_image_file = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
252
  pil_image.save(temp_image_file.name)
253
  return pil_image, temp_image_file.name
 
303
  pil_image.save(temp_image_file.name)
304
  return pil_image, temp_image_file.name
305
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
306
  def plot_heatmap():
307
  plt.figure(figsize=(14, 10))
308
+ sns.heatmap(df_full.iloc[:, 2:], annot=True, cmap="YlGnBu",
309
+ xticklabels=columns[2:], yticklabels=df_full["Model Configuration"])
310
  plt.title("Performance Heatmap", fontsize=16)
311
  plt.tight_layout()
312
 
 
320
  pil_image.save(temp_image_file.name)
321
  return pil_image, temp_image_file.name
322
 
323
+ def scrape_mergekit_config(model_name):
324
+ model_link = df_full.loc[df_full["Model Configuration"] == model_name, "Model Link"].values[0]
325
+ response = requests.get(model_link)
326
+ if response.status_code != 200:
327
+ return f"Failed to fetch model page for {model_name}. Please check the link."
328
+
329
+ soup = BeautifulSoup(response.text, "html.parser")
330
+ yaml_config = soup.find("pre") # Assume YAML is in <pre> tags
331
+ if yaml_config:
332
+ return yaml_config.text.strip()
333
+ return f"No YAML configuration found for {model_name}."
334
+
335
  def download_yaml(yaml_content, model_name):
 
 
 
336
  if "No YAML configuration found" in yaml_content or "Failed to fetch model page" in yaml_content:
337
+ return None
338
 
339
  filename = f"{model_name.replace('/', '_')}_config.yaml"
340
  return gr.File(value=yaml_content.encode(), filename=filename)
341
 
342
+ def scrape_model_page(model_url):
343
+ try:
344
+ response = requests.get(model_url)
345
+ if response.status_code != 200:
346
+ return f"Error: Unable to fetch the page (Status Code: {response.status_code})"
347
+
348
+ soup = BeautifulSoup(response.text, "html.parser")
349
+ yaml_config = soup.find("pre")
350
+ yaml_text = yaml_config.text.strip() if yaml_config else "No YAML configuration found."
351
+ metadata_section = soup.find("div", class_="metadata")
352
+ metadata_text = metadata_section.text.strip() if metadata_section else "No metadata found."
353
+ return f"**YAML Configuration:**\n{yaml_text}\n\n**Metadata:**\n{metadata_text}"
354
+ except Exception as e:
355
+ return f"Error: {str(e)}"
356
+
357
+ def display_scraped_model_data(model_url):
358
+ return scrape_model_page(model_url)
359
+
360
  def download_all_data():
 
361
  csv_buffer = io.StringIO()
362
  df_full.to_csv(csv_buffer, index=False)
363
  csv_data = csv_buffer.getvalue().encode('utf-8')
364
 
 
365
  average_plot_pil, average_plot_name = plot_average_scores()
366
  task_plot_pil, task_plot_name = plot_task_performance()
367
  top_models_plot_pil, top_models_plot_name = plot_task_specific_top_models()
 
386
 
387
  for model_name in df_full["Model Configuration"].to_list():
388
  yaml_content = scrape_mergekit_config(model_name)
389
+ if ("No YAML configuration found" not in yaml_content) and ("Failed to fetch model page" not in yaml_content):
390
+ zf.writestr(f"{model_name.replace('/', '_')}_config.yaml", yaml_content.encode())
391
 
392
  zip_buffer.seek(0)
 
393
  return zip_buffer, "analysis_data.zip"
394
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
395
 
396
+ ### ----------------------------------------------------------------
397
+ ### PART 3: GRADIO INTERFACE
398
+ ### ----------------------------------------------------------------
 
 
 
399
 
 
 
 
 
 
 
 
 
 
 
 
400
  with gr.Blocks() as demo:
401
  gr.Markdown("# Comprehensive Model Performance Analysis with Hugging Face Links")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
402
 
403
+ with gr.Tab("Plots & Scraping"):
404
+ with gr.Row():
405
+ btn1 = gr.Button("Show Average Performance")
406
+ img1 = gr.Image(type="pil", label="Average Performance Plot")
407
+ img1_download = gr.File(label="Download Average Performance")
408
+ btn1.click(plot_average_scores, outputs=[img1,img1_download])
409
+
410
+ with gr.Row():
411
+ btn2 = gr.Button("Show Task Performance")
412
+ img2 = gr.Image(type="pil", label="Task Performance Plot")
413
+ img2_download = gr.File(label="Download Task Performance")
414
+ btn2.click(plot_task_performance, outputs=[img2, img2_download])
415
+
416
+ with gr.Row():
417
+ btn3 = gr.Button("Task-Specific Top Models")
418
+ img3 = gr.Image(type="pil", label="Task-Specific Top Models Plot")
419
+ img3_download = gr.File(label="Download Top Models")
420
+ btn3.click(plot_task_specific_top_models, outputs=[img3, img3_download])
421
+
422
+ with gr.Row():
423
+ btn4 = gr.Button("Plot Performance Heatmap")
424
+ heatmap_img = gr.Image(type="pil", label="Performance Heatmap")
425
+ heatmap_download = gr.File(label="Download Heatmap")
426
+ btn4.click(plot_heatmap, outputs=[heatmap_img, heatmap_download])
427
+
428
+ with gr.Row():
429
+ model_selector = gr.Dropdown(choices=df_full["Model Configuration"].tolist(), label="Select a Model")
430
+ with gr.Column():
431
+ scrape_btn = gr.Button("Scrape MergeKit Configuration")
432
+ yaml_output = gr.Textbox(lines=10, placeholder="YAML Configuration will appear here.")
433
+ scrape_btn.click(scrape_mergekit_config, inputs=model_selector, outputs=yaml_output)
434
+ with gr.Column():
435
+ save_yaml_btn = gr.Button("Save MergeKit Configuration")
436
+ yaml_download = gr.File(label="Download MergeKit Configuration")
437
+ save_yaml_btn.click(download_yaml, inputs=[yaml_output, model_selector], outputs=yaml_download)
438
+
439
+ with gr.Row():
440
+ download_all_btn = gr.Button("Download Everything")
441
+ all_downloads = gr.File(label="Download All Data")
442
+ download_all_btn.click(download_all_data, outputs=all_downloads)
443
+
444
+ gr.Markdown("## Live Scraping Features")
445
+ with gr.Row():
446
+ url_input = gr.Textbox(label="Enter Hugging Face Model URL", placeholder="https://huggingface.co/<model>")
447
+ live_scrape_btn = gr.Button("Scrape Model Page")
448
+ live_scrape_output = gr.Textbox(label="Scraped Data", lines=15)
449
+ live_scrape_btn.click(display_scraped_model_data, inputs=url_input, outputs=live_scrape_output)
450
+
451
+ # NEW TAB: Show the parsed benchmark results from your big script run
452
+ with gr.Tab("Parsed Benchmark Results"):
453
+ gr.Markdown("Here is the aggregated set of benchmark scores & configurations obtained from your script:")
454
+ show_results_btn = gr.Button("Show Parsed Results")
455
+ results_box = gr.Textbox(label="Benchmark Results", lines=30)
456
 
457
+ # When user clicks the button, show the giant text block in the textbox
458
+ show_results_btn.click(fn=view_parsed_benchmark_results, outputs=results_box)
459
+
460
+ demo.launch()