Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
simplify_ux
#944
by
clefourrier
HF staff
- opened
- app.py +30 -44
- pyproject.toml +6 -6
- requirements.txt +1 -1
app.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
import logging
|
2 |
import time
|
3 |
import schedule
|
@@ -60,18 +61,8 @@ NEW_DATA_ON_LEADERBOARD = True
|
|
60 |
LEADERBOARD_DF = None
|
61 |
|
62 |
def restart_space():
|
63 |
-
|
64 |
-
try:
|
65 |
-
# Check if new data is pending and download if necessary
|
66 |
-
if NEW_DATA_ON_LEADERBOARD:
|
67 |
-
logging.info("Fetching latest leaderboard data before restart.")
|
68 |
-
get_latest_data_leaderboard()
|
69 |
|
70 |
-
# Now restart the space
|
71 |
-
API.restart_space(repo_id=REPO_ID, token=HF_TOKEN)
|
72 |
-
logging.info("Space restarted successfully.")
|
73 |
-
except Exception as e:
|
74 |
-
logging.error(f"Failed to restart space: {e}")
|
75 |
|
76 |
def time_diff_wrapper(func):
|
77 |
def wrapper(*args, **kwargs):
|
@@ -109,35 +100,29 @@ def download_dataset(repo_id, local_dir, repo_type="dataset", max_attempts=3, ba
|
|
109 |
attempt += 1
|
110 |
raise Exception(f"Failed to download {repo_id} after {max_attempts} attempts")
|
111 |
|
112 |
-
def get_latest_data_leaderboard(leaderboard_initial_df=None):
|
113 |
global NEW_DATA_ON_LEADERBOARD
|
114 |
global LEADERBOARD_DF
|
115 |
if NEW_DATA_ON_LEADERBOARD:
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
)
|
131 |
-
logging.info("Leaderboard dataset successfully downloaded.")
|
132 |
-
except Exception as e:
|
133 |
-
logging.error(f"Failed to download leaderboard dataset: {e}")
|
134 |
-
return
|
135 |
-
|
136 |
-
# Reset the flag after successful download
|
137 |
NEW_DATA_ON_LEADERBOARD = False
|
|
|
138 |
else:
|
139 |
LEADERBOARD_DF = leaderboard_initial_df
|
140 |
-
|
141 |
return LEADERBOARD_DF
|
142 |
|
143 |
|
@@ -147,9 +132,6 @@ def get_latest_data_queue():
|
|
147 |
|
148 |
def init_space():
|
149 |
"""Initializes the application space, loading only necessary data."""
|
150 |
-
global NEW_DATA_ON_LEADERBOARD
|
151 |
-
NEW_DATA_ON_LEADERBOARD = True # Ensure new data is always pulled on restart
|
152 |
-
|
153 |
if DO_FULL_INIT:
|
154 |
# These downloads only occur on full initialization
|
155 |
try:
|
@@ -467,13 +449,18 @@ def update_leaderboard(payload: WebhookPayload) -> None:
|
|
467 |
"""Redownloads the leaderboard dataset each time it updates"""
|
468 |
if payload.repo.type == "dataset" and payload.event.action == "update":
|
469 |
global NEW_DATA_ON_LEADERBOARD
|
470 |
-
|
471 |
-
|
472 |
-
# Mark the flag for new data
|
473 |
NEW_DATA_ON_LEADERBOARD = True
|
474 |
|
475 |
-
|
476 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
477 |
|
478 |
# The below code is not used at the moment, as we can manage the queue file locally
|
479 |
LAST_UPDATE_QUEUE = datetime.datetime.now()
|
@@ -493,6 +480,5 @@ def update_queue(payload: WebhookPayload) -> None:
|
|
493 |
webhooks_server.launch()
|
494 |
|
495 |
scheduler = BackgroundScheduler()
|
496 |
-
scheduler.add_job(restart_space, "interval", hours=
|
497 |
-
logging.info("Scheduler initialized to restart space every 1 hour.")
|
498 |
scheduler.start()
|
|
|
1 |
+
import os
|
2 |
import logging
|
3 |
import time
|
4 |
import schedule
|
|
|
61 |
LEADERBOARD_DF = None
|
62 |
|
63 |
def restart_space():
|
64 |
+
API.restart_space(repo_id=REPO_ID, token=HF_TOKEN)
|
|
|
|
|
|
|
|
|
|
|
65 |
|
|
|
|
|
|
|
|
|
|
|
66 |
|
67 |
def time_diff_wrapper(func):
|
68 |
def wrapper(*args, **kwargs):
|
|
|
100 |
attempt += 1
|
101 |
raise Exception(f"Failed to download {repo_id} after {max_attempts} attempts")
|
102 |
|
103 |
+
def get_latest_data_leaderboard(leaderboard_initial_df = None):
|
104 |
global NEW_DATA_ON_LEADERBOARD
|
105 |
global LEADERBOARD_DF
|
106 |
if NEW_DATA_ON_LEADERBOARD:
|
107 |
+
print("Leaderboard updated at reload!")
|
108 |
+
leaderboard_dataset = datasets.load_dataset(
|
109 |
+
AGGREGATED_REPO,
|
110 |
+
"default",
|
111 |
+
split="train",
|
112 |
+
cache_dir=HF_HOME,
|
113 |
+
download_mode=datasets.DownloadMode.REUSE_DATASET_IF_EXISTS, # Uses the cached dataset
|
114 |
+
verification_mode="no_checks"
|
115 |
+
)
|
116 |
+
LEADERBOARD_DF = get_leaderboard_df(
|
117 |
+
leaderboard_dataset=leaderboard_dataset,
|
118 |
+
cols=COLS,
|
119 |
+
benchmark_cols=BENCHMARK_COLS,
|
120 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
NEW_DATA_ON_LEADERBOARD = False
|
122 |
+
|
123 |
else:
|
124 |
LEADERBOARD_DF = leaderboard_initial_df
|
125 |
+
|
126 |
return LEADERBOARD_DF
|
127 |
|
128 |
|
|
|
132 |
|
133 |
def init_space():
|
134 |
"""Initializes the application space, loading only necessary data."""
|
|
|
|
|
|
|
135 |
if DO_FULL_INIT:
|
136 |
# These downloads only occur on full initialization
|
137 |
try:
|
|
|
449 |
"""Redownloads the leaderboard dataset each time it updates"""
|
450 |
if payload.repo.type == "dataset" and payload.event.action == "update":
|
451 |
global NEW_DATA_ON_LEADERBOARD
|
452 |
+
if NEW_DATA_ON_LEADERBOARD:
|
453 |
+
return
|
|
|
454 |
NEW_DATA_ON_LEADERBOARD = True
|
455 |
|
456 |
+
datasets.load_dataset(
|
457 |
+
AGGREGATED_REPO,
|
458 |
+
"default",
|
459 |
+
split="train",
|
460 |
+
cache_dir=HF_HOME,
|
461 |
+
download_mode=datasets.DownloadMode.FORCE_REDOWNLOAD,
|
462 |
+
verification_mode="no_checks"
|
463 |
+
)
|
464 |
|
465 |
# The below code is not used at the moment, as we can manage the queue file locally
|
466 |
LAST_UPDATE_QUEUE = datetime.datetime.now()
|
|
|
480 |
webhooks_server.launch()
|
481 |
|
482 |
scheduler = BackgroundScheduler()
|
483 |
+
scheduler.add_job(restart_space, "interval", hours=3) # restarted every 3h as backup in case automatic updates are not working
|
|
|
484 |
scheduler.start()
|
pyproject.toml
CHANGED
@@ -15,9 +15,9 @@ dependencies = [
|
|
15 |
"pandas>=2.2.2",
|
16 |
"python-dateutil>=2.9.0",
|
17 |
"sentencepiece>=0.2.0",
|
18 |
-
"transformers==4.
|
19 |
"tokenizers>=0.19.0",
|
20 |
-
"gradio-space-ci",
|
21 |
"isort>=5.13.2",
|
22 |
"ruff>=0.6.4",
|
23 |
"gradio-leaderboard==0.0.11",
|
@@ -34,16 +34,16 @@ ignore=["I","EM","FBT","TRY003","S101","D101","D102","D103","D104","D105","G004"
|
|
34 |
fixable=["ALL"]
|
35 |
select=["ALL"]
|
36 |
|
37 |
-
[tool.ruff.lint]
|
38 |
select = ["E", "F"]
|
39 |
fixable = ["ALL"]
|
40 |
ignore = ["E501"] # line too long (black is taking care of this)
|
41 |
|
42 |
-
[tool.isort]
|
43 |
profile = "black"
|
44 |
|
45 |
[tool.black]
|
46 |
line-length = 119
|
47 |
|
48 |
-
[tool.
|
49 |
-
|
|
|
15 |
"pandas>=2.2.2",
|
16 |
"python-dateutil>=2.9.0",
|
17 |
"sentencepiece>=0.2.0",
|
18 |
+
"transformers==4.44.2",
|
19 |
"tokenizers>=0.19.0",
|
20 |
+
"gradio-space-ci @ git+https://huggingface.co/spaces/Wauplin/[email protected]",
|
21 |
"isort>=5.13.2",
|
22 |
"ruff>=0.6.4",
|
23 |
"gradio-leaderboard==0.0.11",
|
|
|
34 |
fixable=["ALL"]
|
35 |
select=["ALL"]
|
36 |
|
37 |
+
[tool.ruff.lint]
|
38 |
select = ["E", "F"]
|
39 |
fixable = ["ALL"]
|
40 |
ignore = ["E501"] # line too long (black is taking care of this)
|
41 |
|
42 |
+
[tool.isort]
|
43 |
profile = "black"
|
44 |
|
45 |
[tool.black]
|
46 |
line-length = 119
|
47 |
|
48 |
+
[tool.hatch.metadata]
|
49 |
+
allow-direct-references = true
|
requirements.txt
CHANGED
@@ -6,7 +6,7 @@ huggingface-hub>=0.24.7
|
|
6 |
pandas==2.2.2
|
7 |
python-dateutil==2.9.0
|
8 |
sentencepiece==0.2.0
|
9 |
-
transformers==4.
|
10 |
tokenizers>=0.19.0
|
11 |
gradio-space-ci @ git+https://huggingface.co/spaces/Wauplin/[email protected] # CI !!!
|
12 |
isort==5.13.2
|
|
|
6 |
pandas==2.2.2
|
7 |
python-dateutil==2.9.0
|
8 |
sentencepiece==0.2.0
|
9 |
+
transformers==4.44.2
|
10 |
tokenizers>=0.19.0
|
11 |
gradio-space-ci @ git+https://huggingface.co/spaces/Wauplin/[email protected] # CI !!!
|
12 |
isort==5.13.2
|