Spaces:
Runtime error
Runtime error
Commit
·
f11a054
1
Parent(s):
5721477
feat: Add scheduled collections refresh
Browse files
main.py
CHANGED
@@ -23,6 +23,7 @@ from data_loader import refresh_data
|
|
23 |
login(token=os.getenv("HF_TOKEN"))
|
24 |
|
25 |
UPDATE_SCHEDULE = {"hour": os.getenv("UPDATE_INTERVAL_HOURS", "*/6")}
|
|
|
26 |
|
27 |
cache.setup("mem://?check_interval=10&size=10000")
|
28 |
logger = logging.getLogger(__name__)
|
@@ -95,11 +96,9 @@ def background_refresh_data():
|
|
95 |
async def update_database():
|
96 |
logger.info("Starting scheduled data refresh")
|
97 |
|
98 |
-
# Run refresh_data in a background thread
|
99 |
with concurrent.futures.ThreadPoolExecutor() as executor:
|
100 |
future = executor.submit(background_refresh_data)
|
101 |
|
102 |
-
# Wait for the background task to complete, but allow for cancellation
|
103 |
try:
|
104 |
datasets = await asyncio.get_event_loop().run_in_executor(
|
105 |
None, future.result
|
@@ -151,7 +150,6 @@ async def update_database():
|
|
151 |
finally:
|
152 |
conn.close()
|
153 |
|
154 |
-
# Upload the database file to Hugging Face Hub
|
155 |
try:
|
156 |
upload_file(
|
157 |
path_or_fileobj="datasets.db",
|
@@ -187,14 +185,13 @@ async def lifespan(app: FastAPI):
|
|
187 |
logger.info("Performing initial data refresh")
|
188 |
await update_database()
|
189 |
|
190 |
-
# Set up the scheduler
|
191 |
scheduler = AsyncIOScheduler()
|
192 |
-
# Schedule the update_database function using the UPDATE_SCHEDULE configuration
|
193 |
scheduler.add_job(update_database, CronTrigger(**UPDATE_SCHEDULE))
|
194 |
-
|
195 |
-
scheduler.add_job(update_collections, CronTrigger(hour=0, minute=0))
|
196 |
scheduler.start()
|
197 |
|
|
|
|
|
198 |
yield
|
199 |
|
200 |
scheduler.shutdown()
|
@@ -256,7 +253,6 @@ async def search_datasets(
|
|
256 |
|
257 |
results = [dict(row) for row in c.fetchall()]
|
258 |
|
259 |
-
# Get total count
|
260 |
if match_all:
|
261 |
count_query = """
|
262 |
SELECT COUNT(*) as total FROM datasets
|
|
|
23 |
login(token=os.getenv("HF_TOKEN"))
|
24 |
|
25 |
UPDATE_SCHEDULE = {"hour": os.getenv("UPDATE_INTERVAL_HOURS", "*/6")}
|
26 |
+
COLLECTION_UPDATE_SCHEDULE = {"hour": "0"} # Run at midnight every day
|
27 |
|
28 |
cache.setup("mem://?check_interval=10&size=10000")
|
29 |
logger = logging.getLogger(__name__)
|
|
|
96 |
async def update_database():
|
97 |
logger.info("Starting scheduled data refresh")
|
98 |
|
|
|
99 |
with concurrent.futures.ThreadPoolExecutor() as executor:
|
100 |
future = executor.submit(background_refresh_data)
|
101 |
|
|
|
102 |
try:
|
103 |
datasets = await asyncio.get_event_loop().run_in_executor(
|
104 |
None, future.result
|
|
|
150 |
finally:
|
151 |
conn.close()
|
152 |
|
|
|
153 |
try:
|
154 |
upload_file(
|
155 |
path_or_fileobj="datasets.db",
|
|
|
185 |
logger.info("Performing initial data refresh")
|
186 |
await update_database()
|
187 |
|
|
|
188 |
scheduler = AsyncIOScheduler()
|
|
|
189 |
scheduler.add_job(update_database, CronTrigger(**UPDATE_SCHEDULE))
|
190 |
+
scheduler.add_job(update_collections, CronTrigger(**COLLECTION_UPDATE_SCHEDULE))
|
|
|
191 |
scheduler.start()
|
192 |
|
193 |
+
await update_collections()
|
194 |
+
|
195 |
yield
|
196 |
|
197 |
scheduler.shutdown()
|
|
|
253 |
|
254 |
results = [dict(row) for row in c.fetchall()]
|
255 |
|
|
|
256 |
if match_all:
|
257 |
count_query = """
|
258 |
SELECT COUNT(*) as total FROM datasets
|