Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
import pandas as pd
|
2 |
import matplotlib.pyplot as plt
|
|
|
3 |
import gradio as gr
|
4 |
|
5 |
# Input data
|
@@ -36,90 +37,57 @@ columns = ["Model Configuration", "tinyArc", "tinyHellaswag", "tinyMMLU", "tinyT
|
|
36 |
# Convert to DataFrame
|
37 |
df_full = pd.DataFrame(data_full, columns=columns)
|
38 |
|
39 |
-
def
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
plt.figure(figsize=(12, 8))
|
44 |
-
plt.barh(df_avg_sorted["Model Configuration"], df_avg_sorted["Average Score"])
|
45 |
-
plt.title("Average Performance of Models Across Tasks", fontsize=16)
|
46 |
-
plt.xlabel("Average Score", fontsize=14)
|
47 |
-
plt.ylabel("Model Configuration", fontsize=14)
|
48 |
-
plt.gca().invert_yaxis()
|
49 |
-
plt.grid(axis='x', linestyle='--', alpha=0.7)
|
50 |
-
plt.tight_layout()
|
51 |
-
plt.savefig("average_performance.png")
|
52 |
-
return "average_performance.png"
|
53 |
-
|
54 |
-
def plot_task_performance():
|
55 |
-
df_full_melted = df_full.melt(id_vars="Model Configuration", var_name="Task", value_name="Score")
|
56 |
-
|
57 |
-
plt.figure(figsize=(14, 10))
|
58 |
-
for model in df_full["Model Configuration"]:
|
59 |
-
model_data = df_full_melted[df_full_melted["Model Configuration"] == model]
|
60 |
-
plt.plot(model_data["Task"], model_data["Score"], marker="o", label=model)
|
61 |
|
62 |
-
|
|
|
|
|
|
|
|
|
63 |
plt.xlabel("Task", fontsize=14)
|
64 |
plt.ylabel("Score", fontsize=14)
|
65 |
-
plt.xticks(rotation=45)
|
66 |
-
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=9)
|
67 |
plt.grid(axis='y', linestyle='--', alpha=0.7)
|
68 |
plt.tight_layout()
|
69 |
-
plt.savefig("
|
70 |
-
return "
|
71 |
|
72 |
-
def
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
results = pd.DataFrame({"Top Model": top_models, "Score": top_scores}).reset_index().rename(columns={"index": "Task"})
|
77 |
|
78 |
-
|
79 |
-
plt.
|
80 |
-
|
81 |
-
plt.
|
82 |
-
plt.ylabel("Score", fontsize=14)
|
83 |
-
plt.grid(axis="y", linestyle="--", alpha=0.7)
|
84 |
plt.tight_layout()
|
85 |
-
plt.savefig("
|
86 |
-
return "
|
87 |
-
|
88 |
-
def top_3_models_per_task():
|
89 |
-
top_3_data = {
|
90 |
-
task: df_full.nlargest(3, task)[["Model Configuration", task]].values.tolist()
|
91 |
-
for task in df_full.columns[1:-1]
|
92 |
-
}
|
93 |
-
top_3_results = pd.DataFrame({
|
94 |
-
task: {
|
95 |
-
"Top 3 Models": [entry[0] for entry in top_3_data[task]],
|
96 |
-
"Scores": [entry[1] for entry in top_3_data[task]],
|
97 |
-
}
|
98 |
-
for task in top_3_data
|
99 |
-
}).T.rename_axis("Task").reset_index()
|
100 |
-
return top_3_results
|
101 |
|
102 |
with gr.Blocks() as demo:
|
103 |
-
gr.Markdown("# Model Performance Analysis")
|
104 |
|
105 |
with gr.Row():
|
106 |
-
btn1 = gr.Button("Show
|
107 |
-
|
108 |
-
btn1.click(
|
109 |
|
110 |
with gr.Row():
|
111 |
-
btn2 = gr.Button("
|
112 |
-
|
113 |
-
btn2.click(
|
114 |
|
115 |
with gr.Row():
|
116 |
-
btn3 = gr.Button("
|
117 |
-
|
118 |
-
btn3.click(
|
119 |
|
120 |
with gr.Row():
|
121 |
-
btn4 = gr.Button("
|
122 |
-
|
123 |
-
btn4.click(
|
124 |
|
125 |
demo.launch()
|
|
|
1 |
import pandas as pd
|
2 |
import matplotlib.pyplot as plt
|
3 |
+
import seaborn as sns
|
4 |
import gradio as gr
|
5 |
|
6 |
# Input data
|
|
|
37 |
# Convert to DataFrame
|
38 |
df_full = pd.DataFrame(data_full, columns=columns)
|
39 |
|
40 |
+
def summary_statistics():
|
41 |
+
stats = df_full.iloc[:, 1:].describe().T # Summary stats for each task
|
42 |
+
stats['Std Dev'] = df_full.iloc[:, 1:].std(axis=0)
|
43 |
+
return stats.reset_index()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
+
def plot_distribution_boxplots():
|
46 |
+
plt.figure(figsize=(14, 8))
|
47 |
+
df_melted = df_full.melt(id_vars="Model Configuration", var_name="Task", value_name="Score")
|
48 |
+
sns.boxplot(x="Task", y="Score", data=df_melted)
|
49 |
+
plt.title("Score Distribution by Task", fontsize=16)
|
50 |
plt.xlabel("Task", fontsize=14)
|
51 |
plt.ylabel("Score", fontsize=14)
|
|
|
|
|
52 |
plt.grid(axis='y', linestyle='--', alpha=0.7)
|
53 |
plt.tight_layout()
|
54 |
+
plt.savefig("distribution_boxplots.png")
|
55 |
+
return "distribution_boxplots.png"
|
56 |
|
57 |
+
def best_overall_model():
|
58 |
+
df_full["Average Score"] = df_full.iloc[:, 1:].mean(axis=1)
|
59 |
+
best_model = df_full.loc[df_full["Average Score"].idxmax()]
|
60 |
+
return best_model
|
|
|
61 |
|
62 |
+
def plot_heatmap():
|
63 |
+
plt.figure(figsize=(12, 8))
|
64 |
+
sns.heatmap(df_full.iloc[:, 1:], annot=True, cmap="YlGnBu", xticklabels=columns[1:], yticklabels=df_full["Model Configuration"])
|
65 |
+
plt.title("Performance Heatmap", fontsize=16)
|
|
|
|
|
66 |
plt.tight_layout()
|
67 |
+
plt.savefig("performance_heatmap.png")
|
68 |
+
return "performance_heatmap.png"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
|
70 |
with gr.Blocks() as demo:
|
71 |
+
gr.Markdown("# Enhanced Model Performance Analysis")
|
72 |
|
73 |
with gr.Row():
|
74 |
+
btn1 = gr.Button("Show Summary Statistics")
|
75 |
+
stats_output = gr.Dataframe()
|
76 |
+
btn1.click(summary_statistics, outputs=stats_output)
|
77 |
|
78 |
with gr.Row():
|
79 |
+
btn2 = gr.Button("Plot Score Distributions")
|
80 |
+
dist_img = gr.Image(type="filepath")
|
81 |
+
btn2.click(plot_distribution_boxplots, outputs=dist_img)
|
82 |
|
83 |
with gr.Row():
|
84 |
+
btn3 = gr.Button("Best Overall Model")
|
85 |
+
best_output = gr.Textbox()
|
86 |
+
btn3.click(best_overall_model, outputs=best_output)
|
87 |
|
88 |
with gr.Row():
|
89 |
+
btn4 = gr.Button("Plot Performance Heatmap")
|
90 |
+
heatmap_img = gr.Image(type="filepath")
|
91 |
+
btn4.click(plot_heatmap, outputs=heatmap_img)
|
92 |
|
93 |
demo.launch()
|