Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
pminervini
commited on
Commit
·
c5558c5
1
Parent(s):
a654acb
update
Browse files- backend-cli.py +77 -14
backend-cli.py
CHANGED
@@ -3,6 +3,7 @@
|
|
3 |
import os
|
4 |
import json
|
5 |
|
|
|
6 |
import random
|
7 |
from datetime import datetime
|
8 |
|
@@ -17,6 +18,10 @@ from src.leaderboard.read_evals import EvalResult
|
|
17 |
from src.envs import QUEUE_REPO, RESULTS_REPO, API
|
18 |
from src.utils import my_snapshot_download
|
19 |
|
|
|
|
|
|
|
|
|
20 |
import time
|
21 |
|
22 |
import logging
|
@@ -124,15 +129,11 @@ def process_finished_requests(thr: int) -> bool:
|
|
124 |
|
125 |
# Get all eval request that are FINISHED, if you want to run other evals, change this parameter
|
126 |
eval_requests: list[EvalRequest] = get_eval_requests(job_status=current_finished_status, hf_repo=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH_BACKEND)
|
127 |
-
# Sort the evals by priority (first submitted first run)
|
128 |
eval_requests: list[EvalRequest] = sort_models_by_priority(api=API, models=eval_requests)
|
129 |
|
130 |
-
# XXX
|
131 |
-
# eval_requests = [r for r in eval_requests if 'bloom-560m' in r.model]
|
132 |
-
|
133 |
random.shuffle(eval_requests)
|
134 |
|
135 |
-
from src.leaderboard.read_evals import get_raw_eval_results
|
136 |
eval_results: list[EvalResult] = get_raw_eval_results(EVAL_RESULTS_PATH_BACKEND, EVAL_REQUESTS_PATH_BACKEND, True)
|
137 |
|
138 |
result_name_to_request = {request_to_result_name(r): r for r in eval_requests}
|
@@ -143,9 +144,10 @@ def process_finished_requests(thr: int) -> bool:
|
|
143 |
result_name: str = request_to_result_name(eval_request)
|
144 |
|
145 |
# Check the corresponding result
|
146 |
-
from typing import Optional
|
147 |
eval_result: Optional[EvalResult] = result_name_to_result[result_name] if result_name in result_name_to_result else None
|
148 |
|
|
|
|
|
149 |
task_lst = TASKS_HARNESS.copy()
|
150 |
random.shuffle(task_lst)
|
151 |
|
@@ -169,6 +171,58 @@ def process_finished_requests(thr: int) -> bool:
|
|
169 |
return False
|
170 |
|
171 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
172 |
def process_pending_requests() -> bool:
|
173 |
sanity_checks()
|
174 |
|
@@ -176,7 +230,7 @@ def process_pending_requests() -> bool:
|
|
176 |
|
177 |
# Get all eval request that are PENDING, if you want to run other evals, change this parameter
|
178 |
eval_requests = get_eval_requests(job_status=current_pending_status, hf_repo=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH_BACKEND)
|
179 |
-
# Sort the evals by priority (first submitted first run)
|
180 |
eval_requests = sort_models_by_priority(api=API, models=eval_requests)
|
181 |
|
182 |
random.shuffle(eval_requests)
|
@@ -207,19 +261,28 @@ def process_pending_requests() -> bool:
|
|
207 |
if __name__ == "__main__":
|
208 |
wait = True
|
209 |
|
210 |
-
|
211 |
-
if socket.gethostname() in {'hamburg'} or os.path.isdir("/home/pminervi"):
|
212 |
wait = False
|
213 |
|
214 |
if wait:
|
215 |
time.sleep(60 * random.randint(5, 10))
|
216 |
-
pass
|
217 |
|
218 |
-
|
219 |
-
|
|
|
|
|
|
|
220 |
|
221 |
if res is False:
|
222 |
-
|
|
|
|
|
|
|
|
|
|
|
223 |
|
224 |
if res is False:
|
225 |
-
|
|
|
|
|
|
|
|
3 |
import os
|
4 |
import json
|
5 |
|
6 |
+
import socket
|
7 |
import random
|
8 |
from datetime import datetime
|
9 |
|
|
|
18 |
from src.envs import QUEUE_REPO, RESULTS_REPO, API
|
19 |
from src.utils import my_snapshot_download
|
20 |
|
21 |
+
from src.leaderboard.read_evals import get_raw_eval_results
|
22 |
+
|
23 |
+
from typing import Optional
|
24 |
+
|
25 |
import time
|
26 |
|
27 |
import logging
|
|
|
129 |
|
130 |
# Get all eval request that are FINISHED, if you want to run other evals, change this parameter
|
131 |
eval_requests: list[EvalRequest] = get_eval_requests(job_status=current_finished_status, hf_repo=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH_BACKEND)
|
132 |
+
# Sort the evals by priority (first submitted, first run)
|
133 |
eval_requests: list[EvalRequest] = sort_models_by_priority(api=API, models=eval_requests)
|
134 |
|
|
|
|
|
|
|
135 |
random.shuffle(eval_requests)
|
136 |
|
|
|
137 |
eval_results: list[EvalResult] = get_raw_eval_results(EVAL_RESULTS_PATH_BACKEND, EVAL_REQUESTS_PATH_BACKEND, True)
|
138 |
|
139 |
result_name_to_request = {request_to_result_name(r): r for r in eval_requests}
|
|
|
144 |
result_name: str = request_to_result_name(eval_request)
|
145 |
|
146 |
# Check the corresponding result
|
|
|
147 |
eval_result: Optional[EvalResult] = result_name_to_result[result_name] if result_name in result_name_to_result else None
|
148 |
|
149 |
+
breakpoint()
|
150 |
+
|
151 |
task_lst = TASKS_HARNESS.copy()
|
152 |
random.shuffle(task_lst)
|
153 |
|
|
|
171 |
return False
|
172 |
|
173 |
|
174 |
+
def maybe_refresh_results(thr: int) -> bool:
|
175 |
+
sanity_checks()
|
176 |
+
|
177 |
+
current_finished_status = [PENDING_STATUS, FINISHED_STATUS, FAILED_STATUS]
|
178 |
+
|
179 |
+
# Get all eval request that are FINISHED, if you want to run other evals, change this parameter
|
180 |
+
eval_requests: list[EvalRequest] = get_eval_requests(job_status=current_finished_status, hf_repo=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH_BACKEND)
|
181 |
+
# Sort the evals by priority (first submitted, first run)
|
182 |
+
eval_requests: list[EvalRequest] = sort_models_by_priority(api=API, models=eval_requests)
|
183 |
+
|
184 |
+
random.shuffle(eval_requests)
|
185 |
+
|
186 |
+
eval_results: list[EvalResult] = get_raw_eval_results(EVAL_RESULTS_PATH_BACKEND, EVAL_REQUESTS_PATH_BACKEND, True)
|
187 |
+
|
188 |
+
result_name_to_request = {request_to_result_name(r): r for r in eval_requests}
|
189 |
+
result_name_to_result = {r.eval_name: r for r in eval_results}
|
190 |
+
|
191 |
+
for eval_request in eval_requests:
|
192 |
+
if eval_request.likes >= thr:
|
193 |
+
result_name: str = request_to_result_name(eval_request)
|
194 |
+
|
195 |
+
# Check the corresponding result
|
196 |
+
eval_result: Optional[EvalResult] = result_name_to_result[result_name] if result_name in result_name_to_result else None
|
197 |
+
|
198 |
+
breakpoint()
|
199 |
+
|
200 |
+
task_lst = TASKS_HARNESS.copy()
|
201 |
+
random.shuffle(task_lst)
|
202 |
+
|
203 |
+
# Iterate over tasks and, if we do not have results for a task, run the relevant evaluations
|
204 |
+
for task in task_lst:
|
205 |
+
task_name = task.benchmark
|
206 |
+
|
207 |
+
if (eval_result is None or
|
208 |
+
task_name not in eval_result.results or
|
209 |
+
'nq' in task_name or 'trivia' in task_name or 'tqa' in task_name or 'self' in task_name):
|
210 |
+
eval_request: EvalRequest = result_name_to_request[result_name]
|
211 |
+
|
212 |
+
my_snapshot_download(repo_id=QUEUE_REPO, revision="main", local_dir=EVAL_REQUESTS_PATH_BACKEND, repo_type="dataset", max_workers=60)
|
213 |
+
my_set_eval_request(api=API, eval_request=eval_request, set_to_status=RUNNING_STATUS, hf_repo=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH_BACKEND)
|
214 |
+
|
215 |
+
results = process_evaluation(task, eval_request)
|
216 |
+
|
217 |
+
my_snapshot_download(repo_id=QUEUE_REPO, revision="main", local_dir=EVAL_REQUESTS_PATH_BACKEND, repo_type="dataset", max_workers=60)
|
218 |
+
my_set_eval_request(api=API, eval_request=eval_request, set_to_status=FINISHED_STATUS, hf_repo=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH_BACKEND)
|
219 |
+
|
220 |
+
return True
|
221 |
+
|
222 |
+
|
223 |
+
return False
|
224 |
+
|
225 |
+
|
226 |
def process_pending_requests() -> bool:
|
227 |
sanity_checks()
|
228 |
|
|
|
230 |
|
231 |
# Get all eval request that are PENDING, if you want to run other evals, change this parameter
|
232 |
eval_requests = get_eval_requests(job_status=current_pending_status, hf_repo=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH_BACKEND)
|
233 |
+
# Sort the evals by priority (first submitted, first run)
|
234 |
eval_requests = sort_models_by_priority(api=API, models=eval_requests)
|
235 |
|
236 |
random.shuffle(eval_requests)
|
|
|
261 |
if __name__ == "__main__":
|
262 |
wait = True
|
263 |
|
264 |
+
if socket.gethostname() in {'hamburg', 'neuromancer'} or os.path.isdir("/home/pminervi"):
|
|
|
265 |
wait = False
|
266 |
|
267 |
if wait:
|
268 |
time.sleep(60 * random.randint(5, 10))
|
|
|
269 |
|
270 |
+
res = False
|
271 |
+
|
272 |
+
if random.randint(0, 1) == 0:
|
273 |
+
res = process_pending_requests()
|
274 |
+
time.sleep(60)
|
275 |
|
276 |
if res is False:
|
277 |
+
if random.randint(0, 1) == 0:
|
278 |
+
res = maybe_refresh_results(100)
|
279 |
+
else:
|
280 |
+
res = process_finished_requests(100)
|
281 |
+
|
282 |
+
time.sleep(60)
|
283 |
|
284 |
if res is False:
|
285 |
+
if random.randint(0, 1) == 0:
|
286 |
+
res = maybe_refresh_results(0)
|
287 |
+
else:
|
288 |
+
res = process_finished_requests(0)
|