benediktstroebl commited on
Commit
4415138
·
1 Parent(s): b0d26e5

added Pareto frontier to plot

Browse files
Files changed (5) hide show
  1. app.py +1 -6
  2. demo.py +0 -103
  3. pareto_utils.py +38 -0
  4. requirements.txt +0 -1
  5. utils.py +46 -6
app.py CHANGED
@@ -7,7 +7,6 @@ import pandas as pd
7
  import os
8
  from utils import parse_json_files, create_scatter_plot
9
  from huggingface_hub import snapshot_download
10
- from apscheduler.schedulers.background import BackgroundScheduler
11
 
12
  def restart_space():
13
  API.restart_space(repo_id=REPO_ID)
@@ -67,8 +66,4 @@ if __name__ == "__main__":
67
  etag_timeout=30,
68
  max_workers=4,
69
  )
70
- demo.launch()
71
-
72
- scheduler = BackgroundScheduler()
73
- scheduler.add_job(restart_space, "interval", hours=1) # restarted every 1h as backup in case automatic updates are not working
74
- scheduler.start()
 
7
  import os
8
  from utils import parse_json_files, create_scatter_plot
9
  from huggingface_hub import snapshot_download
 
10
 
11
  def restart_space():
12
  API.restart_space(repo_id=REPO_ID)
 
66
  etag_timeout=30,
67
  max_workers=4,
68
  )
69
+ demo.launch()
 
 
 
 
demo.py DELETED
@@ -1,103 +0,0 @@
1
- import random
2
- import numpy as np
3
- import matplotlib.pyplot as plt
4
- from scipy import stats
5
- from dataclasses import dataclass
6
-
7
- GRID_WIDTH = 10
8
- GRID_HEIGHT = 10
9
- AGENT_NR = 5 # Number of points
10
-
11
- @dataclass
12
- class Agent:
13
- cost_measurements: list
14
- acc_measurements: list
15
-
16
- @property
17
- def cost_mean(self):
18
- return np.mean(self.cost_measurements)
19
-
20
- @property
21
- def acc_mean(self):
22
- return np.mean(self.acc_measurements)
23
-
24
- @property
25
- def cost_conf_interval(self):
26
- if len(self.cost_measurements) > 1:
27
- return stats.sem(self.cost_measurements) * stats.t.ppf((1 + 0.95) / 2., len(self.cost_measurements)-1)
28
- else:
29
- return 0
30
-
31
- @property
32
- def acc_conf_interval(self):
33
- if len(self.acc_measurements) > 1:
34
- return stats.sem(self.acc_measurements) * stats.t.ppf((1 + 0.95) / 2., len(self.acc_measurements)-1)
35
- else:
36
- return 0
37
-
38
- def __repr__(self):
39
- return f"Agent(cost={self.cost_mean:.2f}±{self.cost_conf_interval:.2f}, acc={self.acc_mean:.2f}±{self.acc_conf_interval:.2f})"
40
-
41
- def generate_agent() -> Agent:
42
- cost_mean = random.randint(2, GRID_WIDTH-2)
43
- acc_mean = random.randint(2, GRID_HEIGHT-2)
44
- x_samples = [random.gauss(cost_mean, 0.5) for _ in range(10)] # Gaussian distributed x
45
- y_samples = [random.gauss(acc_mean, 0.5) for _ in range(10)] # Gaussian distributed y
46
- return Agent(x_samples, y_samples)
47
-
48
- def draw_grid() -> None:
49
- plt.axis([-1, GRID_WIDTH + 1, -1, GRID_HEIGHT + 1])
50
- plt.grid(visible=True, which='major', color='0.75', linestyle='--')
51
- plt.xticks(range(0, GRID_WIDTH+1, 1))
52
- plt.yticks(range(0, GRID_HEIGHT+1, 1))
53
- plt.xlim(0, GRID_WIDTH)
54
- plt.ylim(0, GRID_HEIGHT)
55
-
56
- def draw_frontier(points: list[Agent], upper_hull: list[Agent]) -> None:
57
- draw_grid()
58
- for point in points:
59
- plt.errorbar(point.cost_mean, point.acc_mean, xerr=point.cost_conf_interval, yerr=point.acc_conf_interval, fmt='o', color='blue', ecolor='gray', capsize=0)
60
- plt.plot([p.cost_mean for p in upper_hull], [p.acc_mean for p in upper_hull], linestyle='-', color='black', label='Pareto Frontier')
61
- plt.legend(['Pareto Frontier', 'Agents'], loc='upper left')
62
- plt.xlabel('Cost')
63
- plt.ylabel('Accuracy')
64
- plt.show()
65
- plt.close()
66
-
67
- def cross(point_o: Agent, point_a: Agent, point_b: Agent) -> int:
68
- return (point_a.cost_mean - point_o.cost_mean) * (point_b.acc_mean - point_o.acc_mean) - (point_a.acc_mean - point_o.acc_mean) * (point_b.cost_mean - point_o.cost_mean)
69
-
70
- def compute_hull_side(points: list[Agent]) -> list[Agent]:
71
- hull: list[Agent] = []
72
- for p in points:
73
- while len(hull) >= 2 and cross(hull[-2], hull[-1], p) <= 0:
74
- hull.pop()
75
- hull.append(p)
76
- return hull
77
-
78
- def is_pareto_efficient(others, candidate):
79
- for other in others:
80
- if (other.cost_mean <= candidate.cost_mean and other.acc_mean >= candidate.acc_mean) and \
81
- (other.cost_mean < candidate.cost_mean or other.acc_mean > candidate.acc_mean):
82
- return False
83
- return True
84
-
85
- def compute_pareto_frontier(points: list[Agent]) -> list[Agent]:
86
- points = sorted(list(points), key=lambda p: (p.cost_mean, p.acc_mean))
87
- if len(points) <= 1:
88
- return points
89
-
90
- upper_convex_hull = compute_hull_side(list(reversed(points)))
91
- pareto_frontier = [agent for agent in upper_convex_hull if is_pareto_efficient(upper_convex_hull, agent)]
92
-
93
- print("Agents on Pareto frontier:")
94
- for agent in pareto_frontier:
95
- print(agent)
96
-
97
- draw_frontier(points, pareto_frontier)
98
-
99
- return pareto_frontier
100
-
101
- # Main script to generate points and compute the Pareto frontier
102
- points = [generate_agent() for _ in range(AGENT_NR)]
103
- pareto_frontier = compute_pareto_frontier(points)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
pareto_utils.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import matplotlib.pyplot as plt
3
+ from dataclasses import dataclass
4
+
5
+ @dataclass
6
+ class Agent:
7
+ total_cost: float
8
+ accuracy: float
9
+
10
+
11
+ def cross(point_o: Agent, point_a: Agent, point_b: Agent) -> int:
12
+ return (point_a.total_cost - point_o.total_cost) * (point_b.accuracy - point_o.accuracy) - (point_a.accuracy - point_o.accuracy) * (point_b.total_cost - point_o.total_cost)
13
+
14
+ def compute_hull_side(points: list[Agent]) -> list[Agent]:
15
+ hull: list[Agent] = []
16
+ for p in points:
17
+ while len(hull) >= 2 and cross(hull[-2], hull[-1], p) <= 0:
18
+ hull.pop()
19
+ hull.append(p)
20
+ return hull
21
+
22
+ def is_pareto_efficient(others, candidate):
23
+ for other in others:
24
+ if (other.total_cost <= candidate.total_cost and other.accuracy >= candidate.accuracy) and \
25
+ (other.total_cost < candidate.total_cost or other.accuracy > candidate.accuracy):
26
+ return False
27
+ return True
28
+
29
+ def compute_pareto_frontier(points: list[Agent]) -> list[Agent]:
30
+ points = sorted(list(points), key=lambda p: (p.total_cost, p.accuracy))
31
+ if len(points) <= 1:
32
+ return points
33
+
34
+ upper_convex_hull = compute_hull_side(list(reversed(points)))
35
+ pareto_frontier = [agent for agent in upper_convex_hull if is_pareto_efficient(upper_convex_hull, agent)]
36
+
37
+ return pareto_frontier
38
+
requirements.txt CHANGED
@@ -170,7 +170,6 @@ httpcore==1.0.5
170
  httptools==0.6.1
171
  httpx==0.27.0
172
  huggingface-hub==0.23.2
173
- human-eval==1.0
174
  humanfriendly==10.0
175
  idna==3.6
176
  importlib_metadata==7.1.0
 
170
  httptools==0.6.1
171
  httpx==0.27.0
172
  huggingface-hub==0.23.2
 
173
  humanfriendly==10.0
174
  idna==3.6
175
  importlib_metadata==7.1.0
utils.py CHANGED
@@ -2,6 +2,9 @@ import json
2
  from pathlib import Path
3
  import pandas as pd
4
  import plotly.express as px
 
 
 
5
 
6
 
7
  def parse_json_files(folder_path):
@@ -43,25 +46,62 @@ def parse_json_files(folder_path):
43
 
44
 
45
  def create_scatter_plot(df, x: str, y: str, x_label: str = None, y_label: str = None, hover_data: list = None):
 
 
 
 
 
 
 
 
 
46
  fig = px.scatter(df,
47
  x=x,
48
  y=y,
49
  hover_data=hover_data,
50
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  fig.update_layout(
52
  width = 600,
53
  height = 500,
54
  xaxis_title = x_label,
55
  yaxis_title = y_label,
56
  xaxis = dict(
57
- showline = True,
58
- linecolor = 'black',
59
- showgrid = False),
60
  yaxis = dict(
61
  showline = True,
62
  showgrid = False,
63
- linecolor = 'black'
64
- ),
65
- plot_bgcolor = 'white'
 
 
 
 
 
 
 
66
  )
67
  return fig
 
2
  from pathlib import Path
3
  import pandas as pd
4
  import plotly.express as px
5
+ from pareto_utils import Agent, compute_pareto_frontier
6
+ import plotly.graph_objects as go
7
+
8
 
9
 
10
  def parse_json_files(folder_path):
 
46
 
47
 
48
  def create_scatter_plot(df, x: str, y: str, x_label: str = None, y_label: str = None, hover_data: list = None):
49
+ print(df)
50
+
51
+ agents = [Agent(row.results_total_cost, row.results_accuracy) for row in df.itertuples()]
52
+ pareto_frontier = compute_pareto_frontier(agents)
53
+
54
+
55
+
56
+ print(pareto_frontier)
57
+
58
  fig = px.scatter(df,
59
  x=x,
60
  y=y,
61
  hover_data=hover_data,
62
  )
63
+
64
+
65
+ # Sort the Pareto frontier points by x-coordinate
66
+ pareto_points = sorted([(agent.total_cost, agent.accuracy) for agent in pareto_frontier], key=lambda x: x[0])
67
+
68
+ # Add the Pareto frontier line
69
+ fig.add_trace(go.Scatter(
70
+ x=[point[0] for point in pareto_points],
71
+ y=[point[1] for point in pareto_points],
72
+ mode='lines',
73
+ name='Pareto Frontier',
74
+ line=dict(color='black', width=2, dash='dash')
75
+ ))
76
+
77
+ # Calculate the maximum x and y values for setting axis ranges
78
+ max_x = max(df[x].max(), max(point[0] for point in pareto_points))
79
+ max_y = max(df[y].max(), max(point[1] for point in pareto_points))
80
+
81
+ fig.update_yaxes(rangemode="tozero")
82
+ fig.update_xaxes(rangemode="tozero")
83
+
84
  fig.update_layout(
85
  width = 600,
86
  height = 500,
87
  xaxis_title = x_label,
88
  yaxis_title = y_label,
89
  xaxis = dict(
90
+ showline = True,
91
+ linecolor = 'black',
92
+ showgrid = False),
93
  yaxis = dict(
94
  showline = True,
95
  showgrid = False,
96
+ linecolor = 'black'),
97
+ plot_bgcolor = 'white',
98
+ # Legend positioning
99
+ legend=dict(
100
+ yanchor="bottom",
101
+ y=0.01,
102
+ xanchor="right",
103
+ x=0.98,
104
+ bgcolor="rgba(255, 255, 255, 0.5)" # semi-transparent white background
105
+ )
106
  )
107
  return fig