Richard Guo
commited on
Commit
·
1779f92
1
Parent(s):
036b5da
nomic login
Browse files- Dockerfile +3 -3
- build_map.py +11 -6
- main.py +20 -18
- templates/form.html +3 -0
Dockerfile
CHANGED
@@ -26,9 +26,9 @@ RUN pip install --no-cache-dir -r requirements.txt
|
|
26 |
# Copy the current directory contents into the container at $HOME/app setting the owner to the user
|
27 |
COPY --chown=user . $HOME/app
|
28 |
|
29 |
-
# Expose the secret NOMIC_API_KEY at buildtime and use its value
|
30 |
-
RUN --mount=type=secret,id=NOMIC_API_KEY,mode=0444,required=true \
|
31 |
-
|
32 |
|
33 |
# Make port 7860 available to the world outside this container
|
34 |
EXPOSE 7860
|
|
|
26 |
# Copy the current directory contents into the container at $HOME/app setting the owner to the user
|
27 |
COPY --chown=user . $HOME/app
|
28 |
|
29 |
+
# # Expose the secret NOMIC_API_KEY at buildtime and use its value
|
30 |
+
# RUN --mount=type=secret,id=NOMIC_API_KEY,mode=0444,required=true \
|
31 |
+
# nomic login $(cat /run/secrets/NOMIC_API_KEY)
|
32 |
|
33 |
# Make port 7860 available to the world outside this container
|
34 |
EXPOSE 7860
|
build_map.py
CHANGED
@@ -110,6 +110,7 @@ def load_dataset_and_metadata(dataset_name,
|
|
110 |
|
111 |
|
112 |
def upload_dataset_to_atlas(dataset_dict,
|
|
|
113 |
project_name = None,
|
114 |
unique_id_field_name=None,
|
115 |
indexed_field = None,
|
@@ -117,6 +118,7 @@ def upload_dataset_to_atlas(dataset_dict,
|
|
117 |
organization_name=None,
|
118 |
wait_for_map=True,
|
119 |
datum_limit=30000):
|
|
|
120 |
|
121 |
if modality is None:
|
122 |
modality = "text"
|
@@ -140,15 +142,18 @@ def upload_dataset_to_atlas(dataset_dict,
|
|
140 |
uncategorized_fields = get_datum_fields(dataset_dict)
|
141 |
|
142 |
|
143 |
-
# return longest string field
|
144 |
if indexed_field is None:
|
145 |
-
|
146 |
-
longest_len = 0
|
147 |
for field in string_fields:
|
148 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
indexed_field = field
|
150 |
-
longest_len = len(ex[field])
|
151 |
-
|
152 |
|
153 |
topic_label_field = None
|
154 |
if modality == "embedding":
|
|
|
110 |
|
111 |
|
112 |
def upload_dataset_to_atlas(dataset_dict,
|
113 |
+
atlas_api_token: str,
|
114 |
project_name = None,
|
115 |
unique_id_field_name=None,
|
116 |
indexed_field = None,
|
|
|
118 |
organization_name=None,
|
119 |
wait_for_map=True,
|
120 |
datum_limit=30000):
|
121 |
+
nomic.login(atlas_api_token)
|
122 |
|
123 |
if modality is None:
|
124 |
modality = "text"
|
|
|
142 |
uncategorized_fields = get_datum_fields(dataset_dict)
|
143 |
|
144 |
|
145 |
+
# return longest string field from 5 samples
|
146 |
if indexed_field is None:
|
147 |
+
longest_length = 0
|
|
|
148 |
for field in string_fields:
|
149 |
+
length = 0
|
150 |
+
for i in range(len(dataset_dict["head"])):
|
151 |
+
ex = dataset_dict["head"].take([i])
|
152 |
+
if ex[field]:
|
153 |
+
length += len(ex[field].split())
|
154 |
+
if length > longest_length:
|
155 |
+
longest_length = length
|
156 |
indexed_field = field
|
|
|
|
|
157 |
|
158 |
topic_label_field = None
|
159 |
if modality == "embedding":
|
main.py
CHANGED
@@ -12,7 +12,7 @@ from huggingface_hub import create_discussion, comment_discussion
|
|
12 |
from build_map import load_dataset_and_metadata, upload_dataset_to_atlas
|
13 |
from models import WebhookPayload
|
14 |
|
15 |
-
WEBHOOK_SECRET = os.environ.get("WEBHOOK_SECRET")
|
16 |
HUGGINGFACE_ACCESS_TOKEN = os.environ.get("HUGGINGFACE_ACCESS_TOKEN")
|
17 |
|
18 |
app = FastAPI()
|
@@ -20,12 +20,13 @@ app = FastAPI()
|
|
20 |
tasks = {}
|
21 |
templates = Jinja2Templates(directory="templates")
|
22 |
|
23 |
-
def upload_atlas_task(task_id,
|
24 |
-
dataset_name,
|
|
|
25 |
webhook_payload: WebhookPayload = None,
|
26 |
webhook_notify: bool = False):
|
27 |
dataset_dict = load_dataset_and_metadata(dataset_name)
|
28 |
-
map_url = upload_dataset_to_atlas(dataset_dict)
|
29 |
tasks[task_id]['status'] = 'done'
|
30 |
tasks[task_id]['url'] = map_url
|
31 |
tasks[task_id]['finish_time'] = time.time()
|
@@ -61,18 +62,18 @@ async def cleanup_tasks():
|
|
61 |
del tasks[task_id]
|
62 |
await asyncio.sleep(1800) # Wait for 30 minutes
|
63 |
|
64 |
-
|
65 |
-
|
66 |
-
#
|
67 |
-
|
68 |
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
#
|
74 |
-
|
75 |
-
|
76 |
|
77 |
@app.get("/status/{task_id}")
|
78 |
async def read_task(task_id: str):
|
@@ -85,8 +86,9 @@ async def read_task(task_id: str):
|
|
85 |
async def post_webhook(background_tasks: BackgroundTasks, payload: WebhookPayload, x_webhook_secret: Optional[str] = Header(default=None)):
|
86 |
if x_webhook_secret is None:
|
87 |
raise HTTPException(401)
|
88 |
-
|
89 |
-
|
|
|
90 |
|
91 |
if not (
|
92 |
payload.event.action == "update"
|
@@ -98,5 +100,5 @@ async def post_webhook(background_tasks: BackgroundTasks, payload: WebhookPayloa
|
|
98 |
task_id = str(uuid4())
|
99 |
tasks[task_id] = {'status': 'running'}
|
100 |
#form_data = DatasetForm(dataset_name=dataset_name)
|
101 |
-
background_tasks.add_task(upload_atlas_task, task_id, payload.repo.name, payload, True)
|
102 |
return {'task_id': task_id}
|
|
|
12 |
from build_map import load_dataset_and_metadata, upload_dataset_to_atlas
|
13 |
from models import WebhookPayload
|
14 |
|
15 |
+
# WEBHOOK_SECRET = os.environ.get("WEBHOOK_SECRET")
|
16 |
HUGGINGFACE_ACCESS_TOKEN = os.environ.get("HUGGINGFACE_ACCESS_TOKEN")
|
17 |
|
18 |
app = FastAPI()
|
|
|
20 |
tasks = {}
|
21 |
templates = Jinja2Templates(directory="templates")
|
22 |
|
23 |
+
def upload_atlas_task(task_id: str,
|
24 |
+
dataset_name: str,
|
25 |
+
atlas_api_token: str,
|
26 |
webhook_payload: WebhookPayload = None,
|
27 |
webhook_notify: bool = False):
|
28 |
dataset_dict = load_dataset_and_metadata(dataset_name)
|
29 |
+
map_url = upload_dataset_to_atlas(dataset_dict, atlas_api_token)
|
30 |
tasks[task_id]['status'] = 'done'
|
31 |
tasks[task_id]['url'] = map_url
|
32 |
tasks[task_id]['finish_time'] = time.time()
|
|
|
62 |
del tasks[task_id]
|
63 |
await asyncio.sleep(1800) # Wait for 30 minutes
|
64 |
|
65 |
+
@app.get("/", response_class=HTMLResponse)
|
66 |
+
async def read_form(request: Request):
|
67 |
+
# Render the form.html template
|
68 |
+
return templates.TemplateResponse("form.html", {"request": request})
|
69 |
|
70 |
+
@app.post("/submit_form")
|
71 |
+
async def form_post(background_tasks: BackgroundTasks, dataset_name: str = Form(...), atlas_api_token: str = Form(...)):
|
72 |
+
task_id = str(uuid4())
|
73 |
+
tasks[task_id] = {'status': 'running'}
|
74 |
+
#form_data = DatasetForm(dataset_name=dataset_name)
|
75 |
+
background_tasks.add_task(upload_atlas_task, task_id, dataset_name, atlas_api_token)
|
76 |
+
return {'task_id': task_id}
|
77 |
|
78 |
@app.get("/status/{task_id}")
|
79 |
async def read_task(task_id: str):
|
|
|
86 |
async def post_webhook(background_tasks: BackgroundTasks, payload: WebhookPayload, x_webhook_secret: Optional[str] = Header(default=None)):
|
87 |
if x_webhook_secret is None:
|
88 |
raise HTTPException(401)
|
89 |
+
|
90 |
+
# if x_webhook_secret != WEBHOOK_SECRET:
|
91 |
+
# raise HTTPException(403)
|
92 |
|
93 |
if not (
|
94 |
payload.event.action == "update"
|
|
|
100 |
task_id = str(uuid4())
|
101 |
tasks[task_id] = {'status': 'running'}
|
102 |
#form_data = DatasetForm(dataset_name=dataset_name)
|
103 |
+
background_tasks.add_task(upload_atlas_task, task_id, payload.repo.name, x_webhook_secret, payload, True)
|
104 |
return {'task_id': task_id}
|
templates/form.html
CHANGED
@@ -58,6 +58,9 @@
|
|
58 |
<div class="form-group">
|
59 |
<label for="dataset_name">Dataset Name:</label>
|
60 |
<input type="text" class="form-control" id="dataset_name" name="dataset_name">
|
|
|
|
|
|
|
61 |
</div>
|
62 |
<button type="submit" class="btn btn-primary">Submit</button>
|
63 |
</form>
|
|
|
58 |
<div class="form-group">
|
59 |
<label for="dataset_name">Dataset Name:</label>
|
60 |
<input type="text" class="form-control" id="dataset_name" name="dataset_name">
|
61 |
+
|
62 |
+
<label for="atlas_api_token">Atlas API Token:</label>
|
63 |
+
<input type="text" class="form-control" id="atlas_api_token" name="atlas_api_token">
|
64 |
</div>
|
65 |
<button type="submit" class="btn btn-primary">Submit</button>
|
66 |
</form>
|