Spaces:
Sleeping
Sleeping
support new format of lighteval output
Browse files
app.py
CHANGED
@@ -90,8 +90,9 @@ def fetch_available_tasks(results_uri, runs_to_fetch, checkpoint) -> dict[str, d
|
|
90 |
|
91 |
for run in runs_to_fetch:
|
92 |
try:
|
93 |
-
|
94 |
-
|
|
|
95 |
|
96 |
for full_filename in parquet_files:
|
97 |
task_name, date_str = full_filename.replace('.parquet', '').rsplit('_', 1)
|
@@ -102,7 +103,6 @@ def fetch_available_tasks(results_uri, runs_to_fetch, checkpoint) -> dict[str, d
|
|
102 |
except FileNotFoundError:
|
103 |
print(f"Checkpoint not found for run: {run}")
|
104 |
|
105 |
-
print(all_tasks)
|
106 |
|
107 |
available_tasks = {
|
108 |
task: {run: info['filename'] for run, info in runs.items()}
|
@@ -177,10 +177,8 @@ def load_task_data(results_uri, runs_to_fetch, checkpoint, task_name, tasks_file
|
|
177 |
return None, None, None
|
178 |
|
179 |
|
180 |
-
print(runs_to_fetch)
|
181 |
|
182 |
data_folder = DataFolder(f"filecache::{results_uri}", token=token, cache_storage="./results-cache")
|
183 |
-
print(tasks_files)
|
184 |
|
185 |
def fetch_run_file(run_to_fetch):
|
186 |
file_path = f"details/{run_to_fetch}/{checkpoint}/{tasks_files[task_name][run_to_fetch]}"
|
@@ -233,8 +231,12 @@ def load_task_data(results_uri, runs_to_fetch, checkpoint, task_name, tasks_file
|
|
233 |
# For some reason some metrics are stored as strings
|
234 |
metrics = df['metrics']
|
235 |
# Assume all metrics are the same
|
236 |
-
for
|
237 |
-
|
|
|
|
|
|
|
|
|
238 |
return prepared_df.set_index('full_prompt')
|
239 |
|
240 |
def get_gold_label(df, task_type):
|
|
|
90 |
|
91 |
for run in runs_to_fetch:
|
92 |
try:
|
93 |
+
details_folder = f"details/{run}/{checkpoint}"
|
94 |
+
files = data_folder.list_files(details_folder, recursive=True)
|
95 |
+
parquet_files = [f.removeprefix(details_folder + "/") for f in files if f.endswith('.parquet')]
|
96 |
|
97 |
for full_filename in parquet_files:
|
98 |
task_name, date_str = full_filename.replace('.parquet', '').rsplit('_', 1)
|
|
|
103 |
except FileNotFoundError:
|
104 |
print(f"Checkpoint not found for run: {run}")
|
105 |
|
|
|
106 |
|
107 |
available_tasks = {
|
108 |
task: {run: info['filename'] for run, info in runs.items()}
|
|
|
177 |
return None, None, None
|
178 |
|
179 |
|
|
|
180 |
|
181 |
data_folder = DataFolder(f"filecache::{results_uri}", token=token, cache_storage="./results-cache")
|
|
|
182 |
|
183 |
def fetch_run_file(run_to_fetch):
|
184 |
file_path = f"details/{run_to_fetch}/{checkpoint}/{tasks_files[task_name][run_to_fetch]}"
|
|
|
231 |
# For some reason some metrics are stored as strings
|
232 |
metrics = df['metrics']
|
233 |
# Assume all metrics are the same
|
234 |
+
available_metrics = set(metric for row_metrics in metrics for metric in row_metrics)
|
235 |
+
for metric_key in available_metrics:
|
236 |
+
prepared_df[f'metric_{metric_key}_{run_name}'] = [metric.get(metric_key, None) for metric in metrics]
|
237 |
+
|
238 |
+
# Merge rows with the same full_prompt
|
239 |
+
prepared_df = prepared_df.groupby('full_prompt').agg(lambda x: next((item for item in x if item is not None), None)).reset_index()
|
240 |
return prepared_df.set_index('full_prompt')
|
241 |
|
242 |
def get_gold_label(df, task_type):
|