amaye15 commited on
Commit
10656cf
1 Parent(s): 10c2fec
Files changed (1) hide show
  1. app.py +66 -14
app.py CHANGED
@@ -1,8 +1,11 @@
1
  import os
2
  import shutil
3
  import logging
4
- from huggingface_hub import WebhooksServer, WebhookPayload, webhook_endpoint
 
5
  from datasets import Dataset, load_dataset, disable_caching
 
 
6
 
7
  disable_caching()
8
 
@@ -17,9 +20,11 @@ formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(messag
17
  console_handler.setFormatter(formatter)
18
  logger.addHandler(console_handler)
19
 
 
20
  DS_NAME = "amaye15/object-segmentation"
21
  DATA_DIR = "data"
22
  TARGET_REPO = "amaye15/tmp"
 
23
 
24
 
25
  def get_data():
@@ -52,23 +57,70 @@ def process_and_push_data():
52
  logger.info("Data processed and pushed to the hub.")
53
 
54
 
55
- # Initialize the WebhooksServer
56
- app = WebhooksServer(webhook_secret="my_secret_key")
57
 
58
 
59
- @webhook_endpoint
60
- async def trigger_processing(payload: WebhookPayload):
 
 
61
  """
62
  Webhook endpoint that triggers data processing when the dataset is updated.
63
  """
64
- if payload.repo.type == "dataset" and payload.event.action == "update":
65
- logger.info(f"Dataset {payload.repo.name} updated. Triggering processing.")
66
- process_and_push_data()
67
- return {"message": "Data processing triggered successfully."}
68
- else:
69
- logger.info(f"Ignored event: {payload.event.action} on {payload.repo.name}")
70
- return {"message": "Event ignored."}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
 
73
- # Start the webhook server
74
- app.launch()
 
1
  import os
2
  import shutil
3
  import logging
4
+ from pathlib import Path
5
+ from huggingface_hub import WebhooksServer, WebhookPayload
6
  from datasets import Dataset, load_dataset, disable_caching
7
+ from fastapi import BackgroundTasks, Response, status
8
+ from huggingface_hub.utils import build_hf_headers, get_session
9
 
10
  disable_caching()
11
 
 
20
  console_handler.setFormatter(formatter)
21
  logger.addHandler(console_handler)
22
 
23
+ # Environment variables
24
  DS_NAME = "amaye15/object-segmentation"
25
  DATA_DIR = "data"
26
  TARGET_REPO = "amaye15/tmp"
27
+ WEBHOOK_SECRET = os.getenv("HF_WEBHOOK_SECRET", "my_secret_key")
28
 
29
 
30
  def get_data():
 
57
  logger.info("Data processed and pushed to the hub.")
58
 
59
 
60
+ # Initialize the WebhooksServer with Gradio interface (if needed)
61
+ app = WebhooksServer(webhook_secret=WEBHOOK_SECRET)
62
 
63
 
64
+ @app.add_webhook("/dataset_repo")
65
+ async def handle_repository_changes(
66
+ payload: WebhookPayload, task_queue: BackgroundTasks
67
+ ):
68
  """
69
  Webhook endpoint that triggers data processing when the dataset is updated.
70
  """
71
+ if not payload.event.scope.startswith("repo"):
72
+ return Response("No task scheduled", status_code=status.HTTP_200_OK)
73
+
74
+ # Only run if change is on main branch
75
+ try:
76
+ if payload.updatedRefs[0].ref != "refs/heads/main":
77
+ response_content = "No task scheduled: Change not on main branch"
78
+ logger.info(response_content)
79
+ return Response(response_content, status_code=status.HTTP_200_OK)
80
+ except Exception as e:
81
+ logger.error(f"Error checking branch: {str(e)}")
82
+ return Response("No task scheduled", status_code=status.HTTP_200_OK)
83
+
84
+ # No need to run for README only updates
85
+ try:
86
+ commit_files_url = f"{payload.repo.url.api}/compare/{payload.updatedRefs[0].oldSha}..{payload.updatedRefs[0].newSha}?raw=true"
87
+ response_text = (
88
+ get_session().get(commit_files_url, headers=build_hf_headers()).text
89
+ )
90
+ logger.info(f"Git Compare URL: {commit_files_url}")
91
+
92
+ # Splitting the output into lines
93
+ file_lines = response_text.split("\n")
94
+
95
+ # Filtering the lines to find file changes
96
+ changed_files = [line.split("\t")[-1] for line in file_lines if line.strip()]
97
+ logger.info(f"Changed files: {changed_files}")
98
+
99
+ # Checking if only README.md has been changed
100
+ if all("README.md" in file for file in changed_files):
101
+ response_content = "No task scheduled: It's a README only update."
102
+ logger.info(response_content)
103
+ return Response(response_content, status_code=status.HTTP_200_OK)
104
+ except Exception as e:
105
+ logger.error(f"Error checking files: {str(e)}")
106
+ return Response("Unexpected issue", status_code=status.HTTP_501_NOT_IMPLEMENTED)
107
+
108
+ logger.info(
109
+ f"Webhook received from {payload.repo.name} indicating a repo {payload.event.action}"
110
+ )
111
+ task_queue.add_task(_process_webhook)
112
+ return Response("Task scheduled.", status_code=status.HTTP_202_ACCEPTED)
113
+
114
+
115
+ def _process_webhook():
116
+ logger.info(f"Loading new dataset...")
117
+ dataset = load_dataset(DS_NAME)
118
+ logger.info(f"Loaded new dataset")
119
+
120
+ logger.info(f"Processing and updating dataset...")
121
+ process_and_push_data()
122
+ logger.info(f"Processing and updating dataset completed!")
123
 
124
 
125
+ if __name__ == "__main__":
126
+ app.launch(server_name="0.0.0.0", show_error=True, server_port=7860)