core_leaderboard

Running

App Files Files Community

benediktstroebl commited on Aug 4, 2024

Commit

4415138

1 Parent(s): b0d26e5

added Pareto frontier to plot

Browse files

Files changed (5) hide show

app.py +1 -6
demo.py +0 -103
pareto_utils.py +38 -0
requirements.txt +0 -1
utils.py +46 -6

app.py CHANGED Viewed

@@ -7,7 +7,6 @@ import pandas as pd
 import os
 from utils import parse_json_files, create_scatter_plot
 from huggingface_hub import snapshot_download
-from apscheduler.schedulers.background import BackgroundScheduler
 def restart_space():
     API.restart_space(repo_id=REPO_ID)
@@ -67,8 +66,4 @@ if __name__ == "__main__":
                         etag_timeout=30,
                         max_workers=4,
                     )
-    demo.launch()
-    scheduler = BackgroundScheduler()
-    scheduler.add_job(restart_space, "interval", hours=1) # restarted every 1h as backup in case automatic updates are not working
-    scheduler.start()

 import os
 from utils import parse_json_files, create_scatter_plot
 from huggingface_hub import snapshot_download
 def restart_space():
     API.restart_space(repo_id=REPO_ID)
                         etag_timeout=30,
                         max_workers=4,
                     )
+    demo.launch()

demo.py DELETED Viewed

@@ -1,103 +0,0 @@
-import random
-import numpy as np
-import matplotlib.pyplot as plt
-from scipy import stats
-from dataclasses import dataclass
-GRID_WIDTH = 10
-GRID_HEIGHT = 10
-AGENT_NR = 5  # Number of points
-@dataclass
-class Agent:
-    cost_measurements: list
-    acc_measurements: list
-    @property
-    def cost_mean(self):
-        return np.mean(self.cost_measurements)
-    @property
-    def acc_mean(self):
-        return np.mean(self.acc_measurements)
-    @property
-    def cost_conf_interval(self):
-        if len(self.cost_measurements) > 1:
-            return stats.sem(self.cost_measurements) * stats.t.ppf((1 + 0.95) / 2., len(self.cost_measurements)-1)
-        else:
-            return 0
-    @property
-    def acc_conf_interval(self):
-        if len(self.acc_measurements) > 1:
-            return stats.sem(self.acc_measurements) * stats.t.ppf((1 + 0.95) / 2., len(self.acc_measurements)-1)
-        else:
-            return 0
-    def __repr__(self):
-        return f"Agent(cost={self.cost_mean:.2f}±{self.cost_conf_interval:.2f}, acc={self.acc_mean:.2f}±{self.acc_conf_interval:.2f})"
-def generate_agent() -> Agent:
-    cost_mean = random.randint(2, GRID_WIDTH-2)
-    acc_mean = random.randint(2, GRID_HEIGHT-2)
-    x_samples = [random.gauss(cost_mean, 0.5) for _ in range(10)]  # Gaussian distributed x
-    y_samples = [random.gauss(acc_mean, 0.5) for _ in range(10)]  # Gaussian distributed y
-    return Agent(x_samples, y_samples)
-def draw_grid() -> None:
-    plt.axis([-1, GRID_WIDTH + 1, -1, GRID_HEIGHT + 1])
-    plt.grid(visible=True, which='major', color='0.75', linestyle='--')
-    plt.xticks(range(0, GRID_WIDTH+1, 1))
-    plt.yticks(range(0, GRID_HEIGHT+1, 1))
-    plt.xlim(0, GRID_WIDTH)
-    plt.ylim(0, GRID_HEIGHT)
-def draw_frontier(points: list[Agent], upper_hull: list[Agent]) -> None:
-    draw_grid()
-    for point in points:
-        plt.errorbar(point.cost_mean, point.acc_mean, xerr=point.cost_conf_interval, yerr=point.acc_conf_interval, fmt='o', color='blue', ecolor='gray', capsize=0)
-    plt.plot([p.cost_mean for p in upper_hull], [p.acc_mean for p in upper_hull], linestyle='-', color='black', label='Pareto Frontier')
-    plt.legend(['Pareto Frontier', 'Agents'], loc='upper left')
-    plt.xlabel('Cost')
-    plt.ylabel('Accuracy')
-    plt.show()
-    plt.close()
-def cross(point_o: Agent, point_a: Agent, point_b: Agent) -> int:
-    return (point_a.cost_mean - point_o.cost_mean) * (point_b.acc_mean - point_o.acc_mean) - (point_a.acc_mean - point_o.acc_mean) * (point_b.cost_mean - point_o.cost_mean)
-def compute_hull_side(points: list[Agent]) -> list[Agent]:
-    hull: list[Agent] = []
-    for p in points:
-        while len(hull) >= 2 and cross(hull[-2], hull[-1], p) <= 0:
-            hull.pop()
-        hull.append(p)
-    return hull
-def is_pareto_efficient(others, candidate):
-    for other in others:
-        if (other.cost_mean <= candidate.cost_mean and other.acc_mean >= candidate.acc_mean) and \
-           (other.cost_mean < candidate.cost_mean or other.acc_mean > candidate.acc_mean):
-            return False
-    return True
-def compute_pareto_frontier(points: list[Agent]) -> list[Agent]:
-    points = sorted(list(points), key=lambda p: (p.cost_mean, p.acc_mean))
-    if len(points) <= 1:
-        return points
-    upper_convex_hull = compute_hull_side(list(reversed(points)))
-    pareto_frontier = [agent for agent in upper_convex_hull if is_pareto_efficient(upper_convex_hull, agent)]
-    print("Agents on Pareto frontier:")
-    for agent in pareto_frontier:
-        print(agent)
-    draw_frontier(points, pareto_frontier)
-    return pareto_frontier
-# Main script to generate points and compute the Pareto frontier
-points = [generate_agent() for _ in range(AGENT_NR)]
-pareto_frontier = compute_pareto_frontier(points)

pareto_utils.py ADDED Viewed

	@@ -0,0 +1,38 @@

+import numpy as np
+import matplotlib.pyplot as plt
+from dataclasses import dataclass
+@dataclass
+class Agent:
+    total_cost: float
+    accuracy: float
+def cross(point_o: Agent, point_a: Agent, point_b: Agent) -> int:
+    return (point_a.total_cost - point_o.total_cost) * (point_b.accuracy - point_o.accuracy) - (point_a.accuracy - point_o.accuracy) * (point_b.total_cost - point_o.total_cost)
+def compute_hull_side(points: list[Agent]) -> list[Agent]:
+    hull: list[Agent] = []
+    for p in points:
+        while len(hull) >= 2 and cross(hull[-2], hull[-1], p) <= 0:
+            hull.pop()
+        hull.append(p)
+    return hull
+def is_pareto_efficient(others, candidate):
+    for other in others:
+        if (other.total_cost <= candidate.total_cost and other.accuracy >= candidate.accuracy) and \
+           (other.total_cost < candidate.total_cost or other.accuracy > candidate.accuracy):
+            return False
+    return True
+def compute_pareto_frontier(points: list[Agent]) -> list[Agent]:
+    points = sorted(list(points), key=lambda p: (p.total_cost, p.accuracy))
+    if len(points) <= 1:
+        return points
+    upper_convex_hull = compute_hull_side(list(reversed(points)))
+    pareto_frontier = [agent for agent in upper_convex_hull if is_pareto_efficient(upper_convex_hull, agent)]
+    return pareto_frontier

requirements.txt CHANGED Viewed

@@ -170,7 +170,6 @@ httpcore==1.0.5
 httptools==0.6.1
 httpx==0.27.0
 huggingface-hub==0.23.2
-human-eval==1.0
 humanfriendly==10.0
 idna==3.6
 importlib_metadata==7.1.0

 httptools==0.6.1
 httpx==0.27.0
 huggingface-hub==0.23.2
 humanfriendly==10.0
 idna==3.6
 importlib_metadata==7.1.0

utils.py CHANGED Viewed

@@ -2,6 +2,9 @@ import json
 from pathlib import Path
 import pandas as pd
 import plotly.express as px
 def parse_json_files(folder_path):
@@ -43,25 +46,62 @@ def parse_json_files(folder_path):
 def create_scatter_plot(df, x: str, y: str, x_label: str = None, y_label: str = None, hover_data: list = None):
     fig = px.scatter(df,
                      x=x,
                      y=y,
                      hover_data=hover_data,
                      )
     fig.update_layout(
     width = 600,
     height = 500,
     xaxis_title = x_label,
     yaxis_title = y_label,
     xaxis = dict(
-    showline = True,
-    linecolor = 'black',
-    showgrid = False),
     yaxis = dict(
         showline = True,
         showgrid = False,
-        linecolor = 'black'
-    ),
-    plot_bgcolor = 'white'
     )
     return fig

 from pathlib import Path
 import pandas as pd
 import plotly.express as px
+from pareto_utils import Agent, compute_pareto_frontier
+import plotly.graph_objects as go
 def parse_json_files(folder_path):
 def create_scatter_plot(df, x: str, y: str, x_label: str = None, y_label: str = None, hover_data: list = None):
+    print(df)
+    agents = [Agent(row.results_total_cost, row.results_accuracy) for row in df.itertuples()]
+    pareto_frontier = compute_pareto_frontier(agents)
+    print(pareto_frontier)
     fig = px.scatter(df,
                      x=x,
                      y=y,
                      hover_data=hover_data,
                      )
+    # Sort the Pareto frontier points by x-coordinate
+    pareto_points = sorted([(agent.total_cost, agent.accuracy) for agent in pareto_frontier], key=lambda x: x[0])
+    # Add the Pareto frontier line
+    fig.add_trace(go.Scatter(
+        x=[point[0] for point in pareto_points],
+        y=[point[1] for point in pareto_points],
+        mode='lines',
+        name='Pareto Frontier',
+        line=dict(color='black', width=2, dash='dash')
+    ))
+    # Calculate the maximum x and y values for setting axis ranges
+    max_x = max(df[x].max(), max(point[0] for point in pareto_points))
+    max_y = max(df[y].max(), max(point[1] for point in pareto_points))
+    fig.update_yaxes(rangemode="tozero")
+    fig.update_xaxes(rangemode="tozero")
     fig.update_layout(
     width = 600,
     height = 500,
     xaxis_title = x_label,
     yaxis_title = y_label,
     xaxis = dict(
+        showline = True,
+        linecolor = 'black',
+        showgrid = False),
     yaxis = dict(
         showline = True,
         showgrid = False,
+        linecolor = 'black'),
+    plot_bgcolor = 'white',
+    # Legend positioning
+    legend=dict(
+        yanchor="bottom",
+        y=0.01,
+        xanchor="right",
+        x=0.98,
+        bgcolor="rgba(255, 255, 255, 0.5)"  # semi-transparent white background
+        )
     )
     return fig