Spaces:
Running
Running
Upload 2 files
Browse files- utils/db.py +16 -7
- utils/viz.py +2 -2
utils/db.py
CHANGED
@@ -181,7 +181,7 @@ class TracePreprocessor:
|
|
181 |
# if there is a failure report, return the first one
|
182 |
return pickle.loads(df['failure_report'][0])
|
183 |
|
184 |
-
def _calculate_ci(self, data, confidence=0.95):
|
185 |
data = data[np.isfinite(data)]
|
186 |
|
187 |
if len(data) < 2:
|
@@ -189,9 +189,15 @@ class TracePreprocessor:
|
|
189 |
n = len(data)
|
190 |
|
191 |
mean = np.mean(data)
|
192 |
-
sem = stats.sem(data)
|
193 |
|
194 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
195 |
return mean, ci[0], ci[1]
|
196 |
|
197 |
def get_parsed_results(self, benchmark_name, aggregate=True):
|
@@ -222,12 +228,15 @@ class TracePreprocessor:
|
|
222 |
agent_df = df[df['agent_name'] == agent_name]
|
223 |
|
224 |
if len(agent_df) > 1:
|
225 |
-
accuracy_mean, accuracy_lower, accuracy_upper = self._calculate_ci(agent_df['accuracy'])
|
226 |
-
cost_mean, cost_lower, cost_upper = self._calculate_ci(agent_df['total_cost'])
|
227 |
|
228 |
# format the confidence interval with +/- sign
|
229 |
-
accuracy_ci = f"± {abs(accuracy_mean - accuracy_lower):.3f}"
|
230 |
-
cost_ci = f"± {abs(cost_mean - cost_lower):.3f}"
|
|
|
|
|
|
|
231 |
|
232 |
df.loc[df['agent_name'] == agent_name, 'acc_ci'] = accuracy_ci
|
233 |
df.loc[df['agent_name'] == agent_name, 'cost_ci'] = cost_ci
|
|
|
181 |
# if there is a failure report, return the first one
|
182 |
return pickle.loads(df['failure_report'][0])
|
183 |
|
184 |
+
def _calculate_ci(self, data, confidence=0.95, type='minmax'):
|
185 |
data = data[np.isfinite(data)]
|
186 |
|
187 |
if len(data) < 2:
|
|
|
189 |
n = len(data)
|
190 |
|
191 |
mean = np.mean(data)
|
|
|
192 |
|
193 |
+
if type == 't':
|
194 |
+
sem = stats.sem(data)
|
195 |
+
ci = stats.t.interval(confidence, n-1, loc=mean, scale=sem)
|
196 |
+
|
197 |
+
elif type == 'minmax':
|
198 |
+
min = np.min(data)
|
199 |
+
max = np.max(data)
|
200 |
+
ci = (min, max)
|
201 |
return mean, ci[0], ci[1]
|
202 |
|
203 |
def get_parsed_results(self, benchmark_name, aggregate=True):
|
|
|
228 |
agent_df = df[df['agent_name'] == agent_name]
|
229 |
|
230 |
if len(agent_df) > 1:
|
231 |
+
accuracy_mean, accuracy_lower, accuracy_upper = self._calculate_ci(agent_df['accuracy'], type='minmax')
|
232 |
+
cost_mean, cost_lower, cost_upper = self._calculate_ci(agent_df['total_cost'], type='minmax')
|
233 |
|
234 |
# format the confidence interval with +/- sign
|
235 |
+
# accuracy_ci = f"± {abs(accuracy_mean - accuracy_lower):.3f}"
|
236 |
+
# cost_ci = f"± {abs(cost_mean - cost_lower):.3f}"
|
237 |
+
|
238 |
+
accuracy_ci = f"-{abs(accuracy_mean - accuracy_lower):.3f}/+{abs(accuracy_mean - accuracy_upper):.3f}"
|
239 |
+
cost_ci = f"-{abs(cost_mean - cost_lower):.3f}/+{abs(cost_mean - cost_upper):.3f}"
|
240 |
|
241 |
df.loc[df['agent_name'] == agent_name, 'acc_ci'] = accuracy_ci
|
242 |
df.loc[df['agent_name'] == agent_name, 'cost_ci'] = cost_ci
|
utils/viz.py
CHANGED
@@ -202,8 +202,8 @@ def create_scatter_plot(df, x: str, y: str, x_label: str = None, y_label: str =
|
|
202 |
|
203 |
if len(agent_data) > 1:
|
204 |
# Calculate 95% confidence intervals
|
205 |
-
ci_x = stats.t.interval(0.95, len(agent_data[x])-1, loc=np.mean(agent_data[x]), scale=stats.sem(agent_data[x]))
|
206 |
-
ci_y = stats.t.interval(0.95, len(agent_data[y])-1, loc=np.mean(agent_data[y]), scale=stats.sem(agent_data[y]))
|
207 |
|
208 |
# # Add error bars for x (cost)
|
209 |
# fig.add_trace(go.Scatter(
|
|
|
202 |
|
203 |
if len(agent_data) > 1:
|
204 |
# Calculate 95% confidence intervals
|
205 |
+
# ci_x = stats.t.interval(0.95, len(agent_data[x])-1, loc=np.mean(agent_data[x]), scale=stats.sem(agent_data[x]))
|
206 |
+
# ci_y = stats.t.interval(0.95, len(agent_data[y])-1, loc=np.mean(agent_data[y]), scale=stats.sem(agent_data[y]))
|
207 |
|
208 |
# # Add error bars for x (cost)
|
209 |
# fig.add_trace(go.Scatter(
|