CultriX commited on
Commit
bdbadad
·
verified ·
1 Parent(s): 4087642

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -67
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import pandas as pd
2
  import matplotlib.pyplot as plt
 
3
  import gradio as gr
4
 
5
  # Input data
@@ -36,90 +37,57 @@ columns = ["Model Configuration", "tinyArc", "tinyHellaswag", "tinyMMLU", "tinyT
36
  # Convert to DataFrame
37
  df_full = pd.DataFrame(data_full, columns=columns)
38
 
39
- def plot_average_scores():
40
- df_full["Average Score"] = df_full.iloc[:, 1:].mean(axis=1)
41
- df_avg_sorted = df_full.sort_values(by="Average Score", ascending=False)
42
-
43
- plt.figure(figsize=(12, 8))
44
- plt.barh(df_avg_sorted["Model Configuration"], df_avg_sorted["Average Score"])
45
- plt.title("Average Performance of Models Across Tasks", fontsize=16)
46
- plt.xlabel("Average Score", fontsize=14)
47
- plt.ylabel("Model Configuration", fontsize=14)
48
- plt.gca().invert_yaxis()
49
- plt.grid(axis='x', linestyle='--', alpha=0.7)
50
- plt.tight_layout()
51
- plt.savefig("average_performance.png")
52
- return "average_performance.png"
53
-
54
- def plot_task_performance():
55
- df_full_melted = df_full.melt(id_vars="Model Configuration", var_name="Task", value_name="Score")
56
-
57
- plt.figure(figsize=(14, 10))
58
- for model in df_full["Model Configuration"]:
59
- model_data = df_full_melted[df_full_melted["Model Configuration"] == model]
60
- plt.plot(model_data["Task"], model_data["Score"], marker="o", label=model)
61
 
62
- plt.title("Performance of All Models Across Tasks", fontsize=16)
 
 
 
 
63
  plt.xlabel("Task", fontsize=14)
64
  plt.ylabel("Score", fontsize=14)
65
- plt.xticks(rotation=45)
66
- plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=9)
67
  plt.grid(axis='y', linestyle='--', alpha=0.7)
68
  plt.tight_layout()
69
- plt.savefig("task_performance.png")
70
- return "task_performance.png"
71
 
72
- def plot_task_specific_top_models():
73
- top_models = df_full.iloc[:, :-1].set_index("Model Configuration").idxmax()
74
- top_scores = df_full.iloc[:, :-1].set_index("Model Configuration").max()
75
-
76
- results = pd.DataFrame({"Top Model": top_models, "Score": top_scores}).reset_index().rename(columns={"index": "Task"})
77
 
78
- plt.figure(figsize=(12, 6))
79
- plt.bar(results["Task"], results["Score"])
80
- plt.title("Task-Specific Top Models", fontsize=16)
81
- plt.xlabel("Task", fontsize=14)
82
- plt.ylabel("Score", fontsize=14)
83
- plt.grid(axis="y", linestyle="--", alpha=0.7)
84
  plt.tight_layout()
85
- plt.savefig("task_specific_top_models.png")
86
- return "task_specific_top_models.png"
87
-
88
- def top_3_models_per_task():
89
- top_3_data = {
90
- task: df_full.nlargest(3, task)[["Model Configuration", task]].values.tolist()
91
- for task in df_full.columns[1:-1]
92
- }
93
- top_3_results = pd.DataFrame({
94
- task: {
95
- "Top 3 Models": [entry[0] for entry in top_3_data[task]],
96
- "Scores": [entry[1] for entry in top_3_data[task]],
97
- }
98
- for task in top_3_data
99
- }).T.rename_axis("Task").reset_index()
100
- return top_3_results
101
 
102
  with gr.Blocks() as demo:
103
- gr.Markdown("# Model Performance Analysis")
104
 
105
  with gr.Row():
106
- btn1 = gr.Button("Show Average Performance")
107
- img1 = gr.Image(type="filepath")
108
- btn1.click(plot_average_scores, inputs=None, outputs=img1)
109
 
110
  with gr.Row():
111
- btn2 = gr.Button("Show Task Performance")
112
- img2 = gr.Image(type="filepath")
113
- btn2.click(plot_task_performance, inputs=None, outputs=img2)
114
 
115
  with gr.Row():
116
- btn3 = gr.Button("Task-Specific Top Models")
117
- img3 = gr.Image(type="filepath")
118
- btn3.click(plot_task_specific_top_models, inputs=None, outputs=img3)
119
 
120
  with gr.Row():
121
- btn4 = gr.Button("Top 3 Models Per Task")
122
- output4 = gr.Dataframe()
123
- btn4.click(top_3_models_per_task, inputs=None, outputs=output4)
124
 
125
  demo.launch()
 
1
  import pandas as pd
2
  import matplotlib.pyplot as plt
3
+ import seaborn as sns
4
  import gradio as gr
5
 
6
  # Input data
 
37
  # Convert to DataFrame
38
  df_full = pd.DataFrame(data_full, columns=columns)
39
 
40
+ def summary_statistics():
41
+ stats = df_full.iloc[:, 1:].describe().T # Summary stats for each task
42
+ stats['Std Dev'] = df_full.iloc[:, 1:].std(axis=0)
43
+ return stats.reset_index()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
+ def plot_distribution_boxplots():
46
+ plt.figure(figsize=(14, 8))
47
+ df_melted = df_full.melt(id_vars="Model Configuration", var_name="Task", value_name="Score")
48
+ sns.boxplot(x="Task", y="Score", data=df_melted)
49
+ plt.title("Score Distribution by Task", fontsize=16)
50
  plt.xlabel("Task", fontsize=14)
51
  plt.ylabel("Score", fontsize=14)
 
 
52
  plt.grid(axis='y', linestyle='--', alpha=0.7)
53
  plt.tight_layout()
54
+ plt.savefig("distribution_boxplots.png")
55
+ return "distribution_boxplots.png"
56
 
57
+ def best_overall_model():
58
+ df_full["Average Score"] = df_full.iloc[:, 1:].mean(axis=1)
59
+ best_model = df_full.loc[df_full["Average Score"].idxmax()]
60
+ return best_model
 
61
 
62
+ def plot_heatmap():
63
+ plt.figure(figsize=(12, 8))
64
+ sns.heatmap(df_full.iloc[:, 1:], annot=True, cmap="YlGnBu", xticklabels=columns[1:], yticklabels=df_full["Model Configuration"])
65
+ plt.title("Performance Heatmap", fontsize=16)
 
 
66
  plt.tight_layout()
67
+ plt.savefig("performance_heatmap.png")
68
+ return "performance_heatmap.png"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
  with gr.Blocks() as demo:
71
+ gr.Markdown("# Enhanced Model Performance Analysis")
72
 
73
  with gr.Row():
74
+ btn1 = gr.Button("Show Summary Statistics")
75
+ stats_output = gr.Dataframe()
76
+ btn1.click(summary_statistics, outputs=stats_output)
77
 
78
  with gr.Row():
79
+ btn2 = gr.Button("Plot Score Distributions")
80
+ dist_img = gr.Image(type="filepath")
81
+ btn2.click(plot_distribution_boxplots, outputs=dist_img)
82
 
83
  with gr.Row():
84
+ btn3 = gr.Button("Best Overall Model")
85
+ best_output = gr.Textbox()
86
+ btn3.click(best_overall_model, outputs=best_output)
87
 
88
  with gr.Row():
89
+ btn4 = gr.Button("Plot Performance Heatmap")
90
+ heatmap_img = gr.Image(type="filepath")
91
+ btn4.click(plot_heatmap, outputs=heatmap_img)
92
 
93
  demo.launch()