from utils import get_chart_colors import matplotlib import matplotlib.pyplot as plt import numpy as np import plotly.graph_objects as go def setup_matplotlib(): matplotlib.use("Agg") plt.close("all") def get_performance_chart(df, category_name="Overall"): plt.close("all") colors = get_chart_colors() score_column = "Category Score" df_sorted = df.sort_values(score_column, ascending=True) height = max(8, len(df_sorted) * 0.8) fig, ax = plt.subplots(figsize=(16, height)) plt.rcParams.update({"font.size": 12}) fig.patch.set_facecolor(colors["background"]) ax.set_facecolor(colors["background"]) try: bars = ax.barh( np.arange(len(df_sorted)), df_sorted[score_column], height=0.4, capstyle="round", color=[colors[t] for t in df_sorted["Model Type"]], ) ax.set_title( f"Model Performance - {category_name}", pad=20, fontsize=20, fontweight="bold", color=colors["text"], ) ax.set_xlabel( "Average Score (Tool Selection Quality)", fontsize=14, labelpad=10, color=colors["text"], ) ax.set_xlim(0.0, 1.0) ax.set_yticks(np.arange(len(df_sorted))) ax.set_yticklabels(df_sorted["Model"], fontsize=12, color=colors["text"]) plt.subplots_adjust(left=0.35) for i, v in enumerate(df_sorted[score_column]): ax.text( v + 0.01, i, f"{v:.3f}", va="center", fontsize=12, fontweight="bold", color=colors["text"], ) ax.grid(True, axis="x", linestyle="--", alpha=0.2, color=colors["grid"]) ax.spines[["top", "right"]].set_visible(False) ax.spines[["bottom", "left"]].set_color(colors["grid"]) ax.tick_params(colors=colors["text"]) legend_elements = [ plt.Rectangle((0, 0), 1, 1, facecolor=color, label=label) for label, color in { k: colors[k] for k in ["Private", "Open source"] }.items() ] ax.legend( handles=legend_elements, title="Model Type", loc="lower right", fontsize=12, title_fontsize=14, facecolor=colors["background"], labelcolor=colors["text"], ) plt.tight_layout() return fig finally: plt.close(fig) def create_radar_plot(df, model_names): datasets = [col for col in df.columns[7:] if col != "IO Cost"] fig = go.Figure() colors = ["rgba(99, 102, 241, 0.3)", "rgba(34, 197, 94, 0.3)"] line_colors = ["#4F46E5", "#16A34A"] for idx, model_name in enumerate(model_names): model_data = df[df["Model"] == model_name].iloc[0] values = [model_data[m] for m in datasets] values.append(values[0]) datasets_plot = datasets + [datasets[0]] fig.add_trace( go.Scatterpolar( r=values, theta=datasets_plot, fill="toself", fillcolor=colors[idx % len(colors)], line=dict(color=line_colors[idx % len(line_colors)], width=2), name=model_name, text=[f"{val:.3f}" for val in values], textposition="middle right", mode="lines+markers+text", ) ) fig.update_layout( polar=dict( radialaxis=dict( visible=True, range=[0, 1], showline=False, tickfont=dict(size=12) ), angularaxis=dict( tickfont=dict(size=13, family="Arial"), rotation=90, direction="clockwise", ), ), showlegend=True, legend=dict( orientation="h", yanchor="bottom", y=-0.2, xanchor="center", x=0.5, font=dict(size=14), ), title=dict( text="Model Comparison", x=0.5, y=0.95, font=dict(size=24, family="Arial", color="#1F2937"), ), paper_bgcolor="white", plot_bgcolor="white", height=700, width=900, margin=dict(t=100, b=100, l=80, r=80), ) return fig def get_performance_cost_chart(df, category_name="Overall"): colors = get_chart_colors() fig, ax = plt.subplots(figsize=(12, 8), dpi=300) fig.patch.set_facecolor(colors["background"]) ax.set_facecolor(colors["background"]) ax.grid(True, linestyle="--", alpha=0.15, which="both", color=colors["grid"]) score_column = "Category Score" for _, row in df.iterrows(): color = colors[row["Model Type"]] size = 100 if row[score_column] > 0.85 else 80 edge_color = ( colors["Private"] if row["Model Type"] == "Private" else colors["Open source"] ) ax.scatter( row["IO Cost"], row[score_column] * 100, c=color, s=size, alpha=0.9, edgecolor=edge_color, linewidth=1, zorder=5, ) bbox_props = dict( boxstyle="round,pad=0.3", fc=colors["background"], ec="none", alpha=0.8 ) ax.annotate( f"{row['Model']}\n(${row['IO Cost']:.2f})", (row["IO Cost"], row[score_column] * 100), xytext=(5, 5), textcoords="offset points", fontsize=8, color=colors["text"], bbox=bbox_props, zorder=6, ) ax.set_xscale("log") ax.set_xlim(0.08, 40) ax.set_ylim(60, 95) ax.set_xlabel( "I/O Cost per Million Tokens ($)", fontsize=10, labelpad=10, color=colors["text"], ) ax.set_ylabel( "Model Performance Score", fontsize=10, labelpad=10, color=colors["text"] ) legend_elements = [ plt.scatter([], [], c=colors[label], label=label, s=80) for label in ["Private", "Open source"] ] ax.legend( handles=legend_elements, loc="upper right", frameon=True, facecolor=colors["background"], edgecolor="none", fontsize=9, labelcolor=colors["text"], ) ax.set_title( f"Performance vs. Cost - {category_name}", fontsize=14, pad=15, fontweight="bold", color=colors["text"], ) for y1, y2, color in zip([85, 75, 60], [95, 85, 75], colors["performance_bands"]): ax.axhspan(y1, y2, alpha=0.2, color=color, zorder=1) ax.tick_params(axis="both", which="major", labelsize=9, colors=colors["text"]) ax.tick_params(axis="both", which="minor", labelsize=8, colors=colors["text"]) ax.xaxis.set_minor_locator(plt.LogLocator(base=10.0, subs=np.arange(2, 10) * 0.1)) for spine in ax.spines.values(): spine.set_color(colors["grid"]) plt.tight_layout() return fig