Spaces:
Running
Running
Commit
·
4415138
1
Parent(s):
b0d26e5
added Pareto frontier to plot
Browse files- app.py +1 -6
- demo.py +0 -103
- pareto_utils.py +38 -0
- requirements.txt +0 -1
- utils.py +46 -6
app.py
CHANGED
@@ -7,7 +7,6 @@ import pandas as pd
|
|
7 |
import os
|
8 |
from utils import parse_json_files, create_scatter_plot
|
9 |
from huggingface_hub import snapshot_download
|
10 |
-
from apscheduler.schedulers.background import BackgroundScheduler
|
11 |
|
12 |
def restart_space():
|
13 |
API.restart_space(repo_id=REPO_ID)
|
@@ -67,8 +66,4 @@ if __name__ == "__main__":
|
|
67 |
etag_timeout=30,
|
68 |
max_workers=4,
|
69 |
)
|
70 |
-
demo.launch()
|
71 |
-
|
72 |
-
scheduler = BackgroundScheduler()
|
73 |
-
scheduler.add_job(restart_space, "interval", hours=1) # restarted every 1h as backup in case automatic updates are not working
|
74 |
-
scheduler.start()
|
|
|
7 |
import os
|
8 |
from utils import parse_json_files, create_scatter_plot
|
9 |
from huggingface_hub import snapshot_download
|
|
|
10 |
|
11 |
def restart_space():
|
12 |
API.restart_space(repo_id=REPO_ID)
|
|
|
66 |
etag_timeout=30,
|
67 |
max_workers=4,
|
68 |
)
|
69 |
+
demo.launch()
|
|
|
|
|
|
|
|
demo.py
DELETED
@@ -1,103 +0,0 @@
|
|
1 |
-
import random
|
2 |
-
import numpy as np
|
3 |
-
import matplotlib.pyplot as plt
|
4 |
-
from scipy import stats
|
5 |
-
from dataclasses import dataclass
|
6 |
-
|
7 |
-
GRID_WIDTH = 10
|
8 |
-
GRID_HEIGHT = 10
|
9 |
-
AGENT_NR = 5 # Number of points
|
10 |
-
|
11 |
-
@dataclass
|
12 |
-
class Agent:
|
13 |
-
cost_measurements: list
|
14 |
-
acc_measurements: list
|
15 |
-
|
16 |
-
@property
|
17 |
-
def cost_mean(self):
|
18 |
-
return np.mean(self.cost_measurements)
|
19 |
-
|
20 |
-
@property
|
21 |
-
def acc_mean(self):
|
22 |
-
return np.mean(self.acc_measurements)
|
23 |
-
|
24 |
-
@property
|
25 |
-
def cost_conf_interval(self):
|
26 |
-
if len(self.cost_measurements) > 1:
|
27 |
-
return stats.sem(self.cost_measurements) * stats.t.ppf((1 + 0.95) / 2., len(self.cost_measurements)-1)
|
28 |
-
else:
|
29 |
-
return 0
|
30 |
-
|
31 |
-
@property
|
32 |
-
def acc_conf_interval(self):
|
33 |
-
if len(self.acc_measurements) > 1:
|
34 |
-
return stats.sem(self.acc_measurements) * stats.t.ppf((1 + 0.95) / 2., len(self.acc_measurements)-1)
|
35 |
-
else:
|
36 |
-
return 0
|
37 |
-
|
38 |
-
def __repr__(self):
|
39 |
-
return f"Agent(cost={self.cost_mean:.2f}±{self.cost_conf_interval:.2f}, acc={self.acc_mean:.2f}±{self.acc_conf_interval:.2f})"
|
40 |
-
|
41 |
-
def generate_agent() -> Agent:
|
42 |
-
cost_mean = random.randint(2, GRID_WIDTH-2)
|
43 |
-
acc_mean = random.randint(2, GRID_HEIGHT-2)
|
44 |
-
x_samples = [random.gauss(cost_mean, 0.5) for _ in range(10)] # Gaussian distributed x
|
45 |
-
y_samples = [random.gauss(acc_mean, 0.5) for _ in range(10)] # Gaussian distributed y
|
46 |
-
return Agent(x_samples, y_samples)
|
47 |
-
|
48 |
-
def draw_grid() -> None:
|
49 |
-
plt.axis([-1, GRID_WIDTH + 1, -1, GRID_HEIGHT + 1])
|
50 |
-
plt.grid(visible=True, which='major', color='0.75', linestyle='--')
|
51 |
-
plt.xticks(range(0, GRID_WIDTH+1, 1))
|
52 |
-
plt.yticks(range(0, GRID_HEIGHT+1, 1))
|
53 |
-
plt.xlim(0, GRID_WIDTH)
|
54 |
-
plt.ylim(0, GRID_HEIGHT)
|
55 |
-
|
56 |
-
def draw_frontier(points: list[Agent], upper_hull: list[Agent]) -> None:
|
57 |
-
draw_grid()
|
58 |
-
for point in points:
|
59 |
-
plt.errorbar(point.cost_mean, point.acc_mean, xerr=point.cost_conf_interval, yerr=point.acc_conf_interval, fmt='o', color='blue', ecolor='gray', capsize=0)
|
60 |
-
plt.plot([p.cost_mean for p in upper_hull], [p.acc_mean for p in upper_hull], linestyle='-', color='black', label='Pareto Frontier')
|
61 |
-
plt.legend(['Pareto Frontier', 'Agents'], loc='upper left')
|
62 |
-
plt.xlabel('Cost')
|
63 |
-
plt.ylabel('Accuracy')
|
64 |
-
plt.show()
|
65 |
-
plt.close()
|
66 |
-
|
67 |
-
def cross(point_o: Agent, point_a: Agent, point_b: Agent) -> int:
|
68 |
-
return (point_a.cost_mean - point_o.cost_mean) * (point_b.acc_mean - point_o.acc_mean) - (point_a.acc_mean - point_o.acc_mean) * (point_b.cost_mean - point_o.cost_mean)
|
69 |
-
|
70 |
-
def compute_hull_side(points: list[Agent]) -> list[Agent]:
|
71 |
-
hull: list[Agent] = []
|
72 |
-
for p in points:
|
73 |
-
while len(hull) >= 2 and cross(hull[-2], hull[-1], p) <= 0:
|
74 |
-
hull.pop()
|
75 |
-
hull.append(p)
|
76 |
-
return hull
|
77 |
-
|
78 |
-
def is_pareto_efficient(others, candidate):
|
79 |
-
for other in others:
|
80 |
-
if (other.cost_mean <= candidate.cost_mean and other.acc_mean >= candidate.acc_mean) and \
|
81 |
-
(other.cost_mean < candidate.cost_mean or other.acc_mean > candidate.acc_mean):
|
82 |
-
return False
|
83 |
-
return True
|
84 |
-
|
85 |
-
def compute_pareto_frontier(points: list[Agent]) -> list[Agent]:
|
86 |
-
points = sorted(list(points), key=lambda p: (p.cost_mean, p.acc_mean))
|
87 |
-
if len(points) <= 1:
|
88 |
-
return points
|
89 |
-
|
90 |
-
upper_convex_hull = compute_hull_side(list(reversed(points)))
|
91 |
-
pareto_frontier = [agent for agent in upper_convex_hull if is_pareto_efficient(upper_convex_hull, agent)]
|
92 |
-
|
93 |
-
print("Agents on Pareto frontier:")
|
94 |
-
for agent in pareto_frontier:
|
95 |
-
print(agent)
|
96 |
-
|
97 |
-
draw_frontier(points, pareto_frontier)
|
98 |
-
|
99 |
-
return pareto_frontier
|
100 |
-
|
101 |
-
# Main script to generate points and compute the Pareto frontier
|
102 |
-
points = [generate_agent() for _ in range(AGENT_NR)]
|
103 |
-
pareto_frontier = compute_pareto_frontier(points)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pareto_utils.py
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import matplotlib.pyplot as plt
|
3 |
+
from dataclasses import dataclass
|
4 |
+
|
5 |
+
@dataclass
|
6 |
+
class Agent:
|
7 |
+
total_cost: float
|
8 |
+
accuracy: float
|
9 |
+
|
10 |
+
|
11 |
+
def cross(point_o: Agent, point_a: Agent, point_b: Agent) -> int:
|
12 |
+
return (point_a.total_cost - point_o.total_cost) * (point_b.accuracy - point_o.accuracy) - (point_a.accuracy - point_o.accuracy) * (point_b.total_cost - point_o.total_cost)
|
13 |
+
|
14 |
+
def compute_hull_side(points: list[Agent]) -> list[Agent]:
|
15 |
+
hull: list[Agent] = []
|
16 |
+
for p in points:
|
17 |
+
while len(hull) >= 2 and cross(hull[-2], hull[-1], p) <= 0:
|
18 |
+
hull.pop()
|
19 |
+
hull.append(p)
|
20 |
+
return hull
|
21 |
+
|
22 |
+
def is_pareto_efficient(others, candidate):
|
23 |
+
for other in others:
|
24 |
+
if (other.total_cost <= candidate.total_cost and other.accuracy >= candidate.accuracy) and \
|
25 |
+
(other.total_cost < candidate.total_cost or other.accuracy > candidate.accuracy):
|
26 |
+
return False
|
27 |
+
return True
|
28 |
+
|
29 |
+
def compute_pareto_frontier(points: list[Agent]) -> list[Agent]:
|
30 |
+
points = sorted(list(points), key=lambda p: (p.total_cost, p.accuracy))
|
31 |
+
if len(points) <= 1:
|
32 |
+
return points
|
33 |
+
|
34 |
+
upper_convex_hull = compute_hull_side(list(reversed(points)))
|
35 |
+
pareto_frontier = [agent for agent in upper_convex_hull if is_pareto_efficient(upper_convex_hull, agent)]
|
36 |
+
|
37 |
+
return pareto_frontier
|
38 |
+
|
requirements.txt
CHANGED
@@ -170,7 +170,6 @@ httpcore==1.0.5
|
|
170 |
httptools==0.6.1
|
171 |
httpx==0.27.0
|
172 |
huggingface-hub==0.23.2
|
173 |
-
human-eval==1.0
|
174 |
humanfriendly==10.0
|
175 |
idna==3.6
|
176 |
importlib_metadata==7.1.0
|
|
|
170 |
httptools==0.6.1
|
171 |
httpx==0.27.0
|
172 |
huggingface-hub==0.23.2
|
|
|
173 |
humanfriendly==10.0
|
174 |
idna==3.6
|
175 |
importlib_metadata==7.1.0
|
utils.py
CHANGED
@@ -2,6 +2,9 @@ import json
|
|
2 |
from pathlib import Path
|
3 |
import pandas as pd
|
4 |
import plotly.express as px
|
|
|
|
|
|
|
5 |
|
6 |
|
7 |
def parse_json_files(folder_path):
|
@@ -43,25 +46,62 @@ def parse_json_files(folder_path):
|
|
43 |
|
44 |
|
45 |
def create_scatter_plot(df, x: str, y: str, x_label: str = None, y_label: str = None, hover_data: list = None):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
fig = px.scatter(df,
|
47 |
x=x,
|
48 |
y=y,
|
49 |
hover_data=hover_data,
|
50 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
fig.update_layout(
|
52 |
width = 600,
|
53 |
height = 500,
|
54 |
xaxis_title = x_label,
|
55 |
yaxis_title = y_label,
|
56 |
xaxis = dict(
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
yaxis = dict(
|
61 |
showline = True,
|
62 |
showgrid = False,
|
63 |
-
linecolor = 'black'
|
64 |
-
|
65 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
)
|
67 |
return fig
|
|
|
2 |
from pathlib import Path
|
3 |
import pandas as pd
|
4 |
import plotly.express as px
|
5 |
+
from pareto_utils import Agent, compute_pareto_frontier
|
6 |
+
import plotly.graph_objects as go
|
7 |
+
|
8 |
|
9 |
|
10 |
def parse_json_files(folder_path):
|
|
|
46 |
|
47 |
|
48 |
def create_scatter_plot(df, x: str, y: str, x_label: str = None, y_label: str = None, hover_data: list = None):
|
49 |
+
print(df)
|
50 |
+
|
51 |
+
agents = [Agent(row.results_total_cost, row.results_accuracy) for row in df.itertuples()]
|
52 |
+
pareto_frontier = compute_pareto_frontier(agents)
|
53 |
+
|
54 |
+
|
55 |
+
|
56 |
+
print(pareto_frontier)
|
57 |
+
|
58 |
fig = px.scatter(df,
|
59 |
x=x,
|
60 |
y=y,
|
61 |
hover_data=hover_data,
|
62 |
)
|
63 |
+
|
64 |
+
|
65 |
+
# Sort the Pareto frontier points by x-coordinate
|
66 |
+
pareto_points = sorted([(agent.total_cost, agent.accuracy) for agent in pareto_frontier], key=lambda x: x[0])
|
67 |
+
|
68 |
+
# Add the Pareto frontier line
|
69 |
+
fig.add_trace(go.Scatter(
|
70 |
+
x=[point[0] for point in pareto_points],
|
71 |
+
y=[point[1] for point in pareto_points],
|
72 |
+
mode='lines',
|
73 |
+
name='Pareto Frontier',
|
74 |
+
line=dict(color='black', width=2, dash='dash')
|
75 |
+
))
|
76 |
+
|
77 |
+
# Calculate the maximum x and y values for setting axis ranges
|
78 |
+
max_x = max(df[x].max(), max(point[0] for point in pareto_points))
|
79 |
+
max_y = max(df[y].max(), max(point[1] for point in pareto_points))
|
80 |
+
|
81 |
+
fig.update_yaxes(rangemode="tozero")
|
82 |
+
fig.update_xaxes(rangemode="tozero")
|
83 |
+
|
84 |
fig.update_layout(
|
85 |
width = 600,
|
86 |
height = 500,
|
87 |
xaxis_title = x_label,
|
88 |
yaxis_title = y_label,
|
89 |
xaxis = dict(
|
90 |
+
showline = True,
|
91 |
+
linecolor = 'black',
|
92 |
+
showgrid = False),
|
93 |
yaxis = dict(
|
94 |
showline = True,
|
95 |
showgrid = False,
|
96 |
+
linecolor = 'black'),
|
97 |
+
plot_bgcolor = 'white',
|
98 |
+
# Legend positioning
|
99 |
+
legend=dict(
|
100 |
+
yanchor="bottom",
|
101 |
+
y=0.01,
|
102 |
+
xanchor="right",
|
103 |
+
x=0.98,
|
104 |
+
bgcolor="rgba(255, 255, 255, 0.5)" # semi-transparent white background
|
105 |
+
)
|
106 |
)
|
107 |
return fig
|