simplify_ux

#944
by clefourrier HF staff - opened
Files changed (3) hide show
  1. app.py +30 -44
  2. pyproject.toml +6 -6
  3. requirements.txt +1 -1
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import logging
2
  import time
3
  import schedule
@@ -60,18 +61,8 @@ NEW_DATA_ON_LEADERBOARD = True
60
  LEADERBOARD_DF = None
61
 
62
  def restart_space():
63
- logging.info(f"Restarting space with repo ID: {REPO_ID}")
64
- try:
65
- # Check if new data is pending and download if necessary
66
- if NEW_DATA_ON_LEADERBOARD:
67
- logging.info("Fetching latest leaderboard data before restart.")
68
- get_latest_data_leaderboard()
69
 
70
- # Now restart the space
71
- API.restart_space(repo_id=REPO_ID, token=HF_TOKEN)
72
- logging.info("Space restarted successfully.")
73
- except Exception as e:
74
- logging.error(f"Failed to restart space: {e}")
75
 
76
  def time_diff_wrapper(func):
77
  def wrapper(*args, **kwargs):
@@ -109,35 +100,29 @@ def download_dataset(repo_id, local_dir, repo_type="dataset", max_attempts=3, ba
109
  attempt += 1
110
  raise Exception(f"Failed to download {repo_id} after {max_attempts} attempts")
111
 
112
- def get_latest_data_leaderboard(leaderboard_initial_df=None):
113
  global NEW_DATA_ON_LEADERBOARD
114
  global LEADERBOARD_DF
115
  if NEW_DATA_ON_LEADERBOARD:
116
- logging.info("Leaderboard updated at reload!")
117
- try:
118
- leaderboard_dataset = datasets.load_dataset(
119
- AGGREGATED_REPO,
120
- "default",
121
- split="train",
122
- cache_dir=HF_HOME,
123
- download_mode=datasets.DownloadMode.FORCE_REDOWNLOAD, # Always download fresh data
124
- verification_mode="no_checks"
125
- )
126
- LEADERBOARD_DF = get_leaderboard_df(
127
- leaderboard_dataset=leaderboard_dataset,
128
- cols=COLS,
129
- benchmark_cols=BENCHMARK_COLS,
130
- )
131
- logging.info("Leaderboard dataset successfully downloaded.")
132
- except Exception as e:
133
- logging.error(f"Failed to download leaderboard dataset: {e}")
134
- return
135
-
136
- # Reset the flag after successful download
137
  NEW_DATA_ON_LEADERBOARD = False
 
138
  else:
139
  LEADERBOARD_DF = leaderboard_initial_df
140
- logging.info("Using cached leaderboard dataset.")
141
  return LEADERBOARD_DF
142
 
143
 
@@ -147,9 +132,6 @@ def get_latest_data_queue():
147
 
148
  def init_space():
149
  """Initializes the application space, loading only necessary data."""
150
- global NEW_DATA_ON_LEADERBOARD
151
- NEW_DATA_ON_LEADERBOARD = True # Ensure new data is always pulled on restart
152
-
153
  if DO_FULL_INIT:
154
  # These downloads only occur on full initialization
155
  try:
@@ -467,13 +449,18 @@ def update_leaderboard(payload: WebhookPayload) -> None:
467
  """Redownloads the leaderboard dataset each time it updates"""
468
  if payload.repo.type == "dataset" and payload.event.action == "update":
469
  global NEW_DATA_ON_LEADERBOARD
470
- logging.info("New data detected, downloading updated leaderboard dataset.")
471
-
472
- # Mark the flag for new data
473
  NEW_DATA_ON_LEADERBOARD = True
474
 
475
- # Now actually download the latest data immediately
476
- get_latest_data_leaderboard()
 
 
 
 
 
 
477
 
478
  # The below code is not used at the moment, as we can manage the queue file locally
479
  LAST_UPDATE_QUEUE = datetime.datetime.now()
@@ -493,6 +480,5 @@ def update_queue(payload: WebhookPayload) -> None:
493
  webhooks_server.launch()
494
 
495
  scheduler = BackgroundScheduler()
496
- scheduler.add_job(restart_space, "interval", hours=1) # Restart every 1h
497
- logging.info("Scheduler initialized to restart space every 1 hour.")
498
  scheduler.start()
 
1
+ import os
2
  import logging
3
  import time
4
  import schedule
 
61
  LEADERBOARD_DF = None
62
 
63
  def restart_space():
64
+ API.restart_space(repo_id=REPO_ID, token=HF_TOKEN)
 
 
 
 
 
65
 
 
 
 
 
 
66
 
67
  def time_diff_wrapper(func):
68
  def wrapper(*args, **kwargs):
 
100
  attempt += 1
101
  raise Exception(f"Failed to download {repo_id} after {max_attempts} attempts")
102
 
103
+ def get_latest_data_leaderboard(leaderboard_initial_df = None):
104
  global NEW_DATA_ON_LEADERBOARD
105
  global LEADERBOARD_DF
106
  if NEW_DATA_ON_LEADERBOARD:
107
+ print("Leaderboard updated at reload!")
108
+ leaderboard_dataset = datasets.load_dataset(
109
+ AGGREGATED_REPO,
110
+ "default",
111
+ split="train",
112
+ cache_dir=HF_HOME,
113
+ download_mode=datasets.DownloadMode.REUSE_DATASET_IF_EXISTS, # Uses the cached dataset
114
+ verification_mode="no_checks"
115
+ )
116
+ LEADERBOARD_DF = get_leaderboard_df(
117
+ leaderboard_dataset=leaderboard_dataset,
118
+ cols=COLS,
119
+ benchmark_cols=BENCHMARK_COLS,
120
+ )
 
 
 
 
 
 
 
121
  NEW_DATA_ON_LEADERBOARD = False
122
+
123
  else:
124
  LEADERBOARD_DF = leaderboard_initial_df
125
+
126
  return LEADERBOARD_DF
127
 
128
 
 
132
 
133
  def init_space():
134
  """Initializes the application space, loading only necessary data."""
 
 
 
135
  if DO_FULL_INIT:
136
  # These downloads only occur on full initialization
137
  try:
 
449
  """Redownloads the leaderboard dataset each time it updates"""
450
  if payload.repo.type == "dataset" and payload.event.action == "update":
451
  global NEW_DATA_ON_LEADERBOARD
452
+ if NEW_DATA_ON_LEADERBOARD:
453
+ return
 
454
  NEW_DATA_ON_LEADERBOARD = True
455
 
456
+ datasets.load_dataset(
457
+ AGGREGATED_REPO,
458
+ "default",
459
+ split="train",
460
+ cache_dir=HF_HOME,
461
+ download_mode=datasets.DownloadMode.FORCE_REDOWNLOAD,
462
+ verification_mode="no_checks"
463
+ )
464
 
465
  # The below code is not used at the moment, as we can manage the queue file locally
466
  LAST_UPDATE_QUEUE = datetime.datetime.now()
 
480
  webhooks_server.launch()
481
 
482
  scheduler = BackgroundScheduler()
483
+ scheduler.add_job(restart_space, "interval", hours=3) # restarted every 3h as backup in case automatic updates are not working
 
484
  scheduler.start()
pyproject.toml CHANGED
@@ -15,9 +15,9 @@ dependencies = [
15
  "pandas>=2.2.2",
16
  "python-dateutil>=2.9.0",
17
  "sentencepiece>=0.2.0",
18
- "transformers==4.45.1",
19
  "tokenizers>=0.19.0",
20
- "gradio-space-ci",
21
  "isort>=5.13.2",
22
  "ruff>=0.6.4",
23
  "gradio-leaderboard==0.0.11",
@@ -34,16 +34,16 @@ ignore=["I","EM","FBT","TRY003","S101","D101","D102","D103","D104","D105","G004"
34
  fixable=["ALL"]
35
  select=["ALL"]
36
 
37
- [tool.ruff.lint]
38
  select = ["E", "F"]
39
  fixable = ["ALL"]
40
  ignore = ["E501"] # line too long (black is taking care of this)
41
 
42
- [tool.isort]
43
  profile = "black"
44
 
45
  [tool.black]
46
  line-length = 119
47
 
48
- [tool.uv.sources]
49
- gradio-space-ci = { git = "https://huggingface.co/spaces/Wauplin/gradio-space-ci" }
 
15
  "pandas>=2.2.2",
16
  "python-dateutil>=2.9.0",
17
  "sentencepiece>=0.2.0",
18
+ "transformers==4.44.2",
19
  "tokenizers>=0.19.0",
20
+ "gradio-space-ci @ git+https://huggingface.co/spaces/Wauplin/[email protected]",
21
  "isort>=5.13.2",
22
  "ruff>=0.6.4",
23
  "gradio-leaderboard==0.0.11",
 
34
  fixable=["ALL"]
35
  select=["ALL"]
36
 
37
+ [tool.ruff.lint]
38
  select = ["E", "F"]
39
  fixable = ["ALL"]
40
  ignore = ["E501"] # line too long (black is taking care of this)
41
 
42
+ [tool.isort]
43
  profile = "black"
44
 
45
  [tool.black]
46
  line-length = 119
47
 
48
+ [tool.hatch.metadata]
49
+ allow-direct-references = true
requirements.txt CHANGED
@@ -6,7 +6,7 @@ huggingface-hub>=0.24.7
6
  pandas==2.2.2
7
  python-dateutil==2.9.0
8
  sentencepiece==0.2.0
9
- transformers==4.45.1
10
  tokenizers>=0.19.0
11
  gradio-space-ci @ git+https://huggingface.co/spaces/Wauplin/[email protected] # CI !!!
12
  isort==5.13.2
 
6
  pandas==2.2.2
7
  python-dateutil==2.9.0
8
  sentencepiece==0.2.0
9
+ transformers==4.44.2
10
  tokenizers>=0.19.0
11
  gradio-space-ci @ git+https://huggingface.co/spaces/Wauplin/[email protected] # CI !!!
12
  isort==5.13.2