Richard Guo commited on
Commit
1779f92
·
1 Parent(s): 036b5da

nomic login

Browse files
Files changed (4) hide show
  1. Dockerfile +3 -3
  2. build_map.py +11 -6
  3. main.py +20 -18
  4. templates/form.html +3 -0
Dockerfile CHANGED
@@ -26,9 +26,9 @@ RUN pip install --no-cache-dir -r requirements.txt
26
  # Copy the current directory contents into the container at $HOME/app setting the owner to the user
27
  COPY --chown=user . $HOME/app
28
 
29
- # Expose the secret NOMIC_API_KEY at buildtime and use its value
30
- RUN --mount=type=secret,id=NOMIC_API_KEY,mode=0444,required=true \
31
- nomic login $(cat /run/secrets/NOMIC_API_KEY)
32
 
33
  # Make port 7860 available to the world outside this container
34
  EXPOSE 7860
 
26
  # Copy the current directory contents into the container at $HOME/app setting the owner to the user
27
  COPY --chown=user . $HOME/app
28
 
29
+ # # Expose the secret NOMIC_API_KEY at buildtime and use its value
30
+ # RUN --mount=type=secret,id=NOMIC_API_KEY,mode=0444,required=true \
31
+ # nomic login $(cat /run/secrets/NOMIC_API_KEY)
32
 
33
  # Make port 7860 available to the world outside this container
34
  EXPOSE 7860
build_map.py CHANGED
@@ -110,6 +110,7 @@ def load_dataset_and_metadata(dataset_name,
110
 
111
 
112
  def upload_dataset_to_atlas(dataset_dict,
 
113
  project_name = None,
114
  unique_id_field_name=None,
115
  indexed_field = None,
@@ -117,6 +118,7 @@ def upload_dataset_to_atlas(dataset_dict,
117
  organization_name=None,
118
  wait_for_map=True,
119
  datum_limit=30000):
 
120
 
121
  if modality is None:
122
  modality = "text"
@@ -140,15 +142,18 @@ def upload_dataset_to_atlas(dataset_dict,
140
  uncategorized_fields = get_datum_fields(dataset_dict)
141
 
142
 
143
- # return longest string field
144
  if indexed_field is None:
145
- ex = dataset_dict["head"].take([0])
146
- longest_len = 0
147
  for field in string_fields:
148
- if ex[field] and len(ex[field]) > longest_len:
 
 
 
 
 
 
149
  indexed_field = field
150
- longest_len = len(ex[field])
151
-
152
 
153
  topic_label_field = None
154
  if modality == "embedding":
 
110
 
111
 
112
  def upload_dataset_to_atlas(dataset_dict,
113
+ atlas_api_token: str,
114
  project_name = None,
115
  unique_id_field_name=None,
116
  indexed_field = None,
 
118
  organization_name=None,
119
  wait_for_map=True,
120
  datum_limit=30000):
121
+ nomic.login(atlas_api_token)
122
 
123
  if modality is None:
124
  modality = "text"
 
142
  uncategorized_fields = get_datum_fields(dataset_dict)
143
 
144
 
145
+ # return longest string field from 5 samples
146
  if indexed_field is None:
147
+ longest_length = 0
 
148
  for field in string_fields:
149
+ length = 0
150
+ for i in range(len(dataset_dict["head"])):
151
+ ex = dataset_dict["head"].take([i])
152
+ if ex[field]:
153
+ length += len(ex[field].split())
154
+ if length > longest_length:
155
+ longest_length = length
156
  indexed_field = field
 
 
157
 
158
  topic_label_field = None
159
  if modality == "embedding":
main.py CHANGED
@@ -12,7 +12,7 @@ from huggingface_hub import create_discussion, comment_discussion
12
  from build_map import load_dataset_and_metadata, upload_dataset_to_atlas
13
  from models import WebhookPayload
14
 
15
- WEBHOOK_SECRET = os.environ.get("WEBHOOK_SECRET")
16
  HUGGINGFACE_ACCESS_TOKEN = os.environ.get("HUGGINGFACE_ACCESS_TOKEN")
17
 
18
  app = FastAPI()
@@ -20,12 +20,13 @@ app = FastAPI()
20
  tasks = {}
21
  templates = Jinja2Templates(directory="templates")
22
 
23
- def upload_atlas_task(task_id,
24
- dataset_name,
 
25
  webhook_payload: WebhookPayload = None,
26
  webhook_notify: bool = False):
27
  dataset_dict = load_dataset_and_metadata(dataset_name)
28
- map_url = upload_dataset_to_atlas(dataset_dict)
29
  tasks[task_id]['status'] = 'done'
30
  tasks[task_id]['url'] = map_url
31
  tasks[task_id]['finish_time'] = time.time()
@@ -61,18 +62,18 @@ async def cleanup_tasks():
61
  del tasks[task_id]
62
  await asyncio.sleep(1800) # Wait for 30 minutes
63
 
64
- # @app.get("/", response_class=HTMLResponse)
65
- # async def read_form(request: Request):
66
- # # Render the form.html template
67
- # return templates.TemplateResponse("form.html", {"request": request})
68
 
69
- # @app.post("/submit_form")
70
- # async def form_post(background_tasks: BackgroundTasks, dataset_name: str = Form(...)):
71
- # task_id = str(uuid4())
72
- # tasks[task_id] = {'status': 'running'}
73
- # #form_data = DatasetForm(dataset_name=dataset_name)
74
- # background_tasks.add_task(upload_atlas_task, task_id, dataset_name)
75
- # return {'task_id': task_id}
76
 
77
  @app.get("/status/{task_id}")
78
  async def read_task(task_id: str):
@@ -85,8 +86,9 @@ async def read_task(task_id: str):
85
  async def post_webhook(background_tasks: BackgroundTasks, payload: WebhookPayload, x_webhook_secret: Optional[str] = Header(default=None)):
86
  if x_webhook_secret is None:
87
  raise HTTPException(401)
88
- if x_webhook_secret != WEBHOOK_SECRET:
89
- raise HTTPException(403)
 
90
 
91
  if not (
92
  payload.event.action == "update"
@@ -98,5 +100,5 @@ async def post_webhook(background_tasks: BackgroundTasks, payload: WebhookPayloa
98
  task_id = str(uuid4())
99
  tasks[task_id] = {'status': 'running'}
100
  #form_data = DatasetForm(dataset_name=dataset_name)
101
- background_tasks.add_task(upload_atlas_task, task_id, payload.repo.name, payload, True)
102
  return {'task_id': task_id}
 
12
  from build_map import load_dataset_and_metadata, upload_dataset_to_atlas
13
  from models import WebhookPayload
14
 
15
+ # WEBHOOK_SECRET = os.environ.get("WEBHOOK_SECRET")
16
  HUGGINGFACE_ACCESS_TOKEN = os.environ.get("HUGGINGFACE_ACCESS_TOKEN")
17
 
18
  app = FastAPI()
 
20
  tasks = {}
21
  templates = Jinja2Templates(directory="templates")
22
 
23
+ def upload_atlas_task(task_id: str,
24
+ dataset_name: str,
25
+ atlas_api_token: str,
26
  webhook_payload: WebhookPayload = None,
27
  webhook_notify: bool = False):
28
  dataset_dict = load_dataset_and_metadata(dataset_name)
29
+ map_url = upload_dataset_to_atlas(dataset_dict, atlas_api_token)
30
  tasks[task_id]['status'] = 'done'
31
  tasks[task_id]['url'] = map_url
32
  tasks[task_id]['finish_time'] = time.time()
 
62
  del tasks[task_id]
63
  await asyncio.sleep(1800) # Wait for 30 minutes
64
 
65
+ @app.get("/", response_class=HTMLResponse)
66
+ async def read_form(request: Request):
67
+ # Render the form.html template
68
+ return templates.TemplateResponse("form.html", {"request": request})
69
 
70
+ @app.post("/submit_form")
71
+ async def form_post(background_tasks: BackgroundTasks, dataset_name: str = Form(...), atlas_api_token: str = Form(...)):
72
+ task_id = str(uuid4())
73
+ tasks[task_id] = {'status': 'running'}
74
+ #form_data = DatasetForm(dataset_name=dataset_name)
75
+ background_tasks.add_task(upload_atlas_task, task_id, dataset_name, atlas_api_token)
76
+ return {'task_id': task_id}
77
 
78
  @app.get("/status/{task_id}")
79
  async def read_task(task_id: str):
 
86
  async def post_webhook(background_tasks: BackgroundTasks, payload: WebhookPayload, x_webhook_secret: Optional[str] = Header(default=None)):
87
  if x_webhook_secret is None:
88
  raise HTTPException(401)
89
+
90
+ # if x_webhook_secret != WEBHOOK_SECRET:
91
+ # raise HTTPException(403)
92
 
93
  if not (
94
  payload.event.action == "update"
 
100
  task_id = str(uuid4())
101
  tasks[task_id] = {'status': 'running'}
102
  #form_data = DatasetForm(dataset_name=dataset_name)
103
+ background_tasks.add_task(upload_atlas_task, task_id, payload.repo.name, x_webhook_secret, payload, True)
104
  return {'task_id': task_id}
templates/form.html CHANGED
@@ -58,6 +58,9 @@
58
  <div class="form-group">
59
  <label for="dataset_name">Dataset Name:</label>
60
  <input type="text" class="form-control" id="dataset_name" name="dataset_name">
 
 
 
61
  </div>
62
  <button type="submit" class="btn btn-primary">Submit</button>
63
  </form>
 
58
  <div class="form-group">
59
  <label for="dataset_name">Dataset Name:</label>
60
  <input type="text" class="form-control" id="dataset_name" name="dataset_name">
61
+
62
+ <label for="atlas_api_token">Atlas API Token:</label>
63
+ <input type="text" class="form-control" id="atlas_api_token" name="atlas_api_token">
64
  </div>
65
  <button type="submit" class="btn btn-primary">Submit</button>
66
  </form>