Richard Guo commited on
Commit
442f97c
·
1 Parent(s): 9a64205

working basic app + dockerfile

Browse files
Files changed (7) hide show
  1. .gitignore +160 -0
  2. Dockerfile +9 -4
  3. app.py +0 -36
  4. build_map.py +8 -3
  5. form.html +0 -11
  6. main.py +57 -0
  7. templates/form.html +98 -0
.gitignore ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # poetry
98
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102
+ #poetry.lock
103
+
104
+ # pdm
105
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106
+ #pdm.lock
107
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108
+ # in version control.
109
+ # https://pdm.fming.dev/#use-with-ide
110
+ .pdm.toml
111
+
112
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113
+ __pypackages__/
114
+
115
+ # Celery stuff
116
+ celerybeat-schedule
117
+ celerybeat.pid
118
+
119
+ # SageMath parsed files
120
+ *.sage.py
121
+
122
+ # Environments
123
+ .env
124
+ .venv
125
+ env/
126
+ venv/
127
+ ENV/
128
+ env.bak/
129
+ venv.bak/
130
+
131
+ # Spyder project settings
132
+ .spyderproject
133
+ .spyproject
134
+
135
+ # Rope project settings
136
+ .ropeproject
137
+
138
+ # mkdocs documentation
139
+ /site
140
+
141
+ # mypy
142
+ .mypy_cache/
143
+ .dmypy.json
144
+ dmypy.json
145
+
146
+ # Pyre type checker
147
+ .pyre/
148
+
149
+ # pytype static type analyzer
150
+ .pytype/
151
+
152
+ # Cython debug symbols
153
+ cython_debug/
154
+
155
+ # PyCharm
156
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
159
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160
+ #.idea/
Dockerfile CHANGED
@@ -1,14 +1,19 @@
1
  # read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
  # you will also find guides on how best to write your Dockerfile
3
 
 
4
  FROM python:3.9
5
 
6
- WORKDIR /code
 
7
 
8
- COPY ./requirements.txt /code/requirements.txt
 
9
 
10
- RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
 
11
 
12
- COPY . .
 
13
 
14
  CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
 
1
  # read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
  # you will also find guides on how best to write your Dockerfile
3
 
4
+ # Use an official Python runtime as a parent image
5
  FROM python:3.9
6
 
7
+ # Set the working directory in the container to /app
8
+ WORKDIR /app
9
 
10
+ # Add the current directory contents into the container at /app
11
+ ADD . /app
12
 
13
+ # Install any needed packages specified in requirements.txt
14
+ RUN pip install --no-cache-dir -r requirements.txt
15
 
16
+ # Make port 7860 available to the world outside this container
17
+ EXPOSE 7860
18
 
19
  CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
app.py DELETED
@@ -1,36 +0,0 @@
1
- from fastapi import FastAPI, Request, WebSocket
2
- from fastapi.responses import HTMLResponse
3
- from fastapi.templating import Jinja2Templates
4
- from typing import Optional
5
-
6
-
7
-
8
- from pydantic import BaseModel
9
-
10
-
11
- app = FastAPI()
12
- templates = Jinja2Templates(directory="templates")
13
-
14
-
15
- # Create a Pydantic model for the form data
16
- class DatasetForm(BaseModel):
17
- dataset_name: str
18
-
19
-
20
-
21
- def long_running_function():
22
- pass
23
-
24
- @app.get("/", response_class=HTMLResponse)
25
- async def read_form(request: Request):
26
- # Render the form.html template
27
- return templates.TemplateResponse("form.html", {"request": request})
28
-
29
- @app.post("/submit_form")
30
- async def form_post(form_data: DatasetForm):
31
- # Do something with form_data
32
- print(form_data.dict())
33
- return {"message": "Form data received!", "received_data": form_data.dict()}
34
-
35
-
36
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
build_map.py CHANGED
@@ -109,12 +109,13 @@ def load_dataset_and_metadata(dataset_name,
109
  return dataset_dict
110
 
111
 
112
- def upload_project_to_atlas(dataset_dict,
113
  project_name = None,
114
  unique_id_field_name=None,
115
  indexed_field = None,
116
  modality=None,
117
- organization_name=None):
 
118
 
119
  if modality is None:
120
  modality = "text"
@@ -231,6 +232,10 @@ def upload_project_to_atlas(dataset_dict,
231
  topic_label_field = topic_label_field,
232
  build_topic_model=True)
233
 
 
 
 
 
234
  return projection.map_link
235
 
236
  # Run test
@@ -240,4 +245,4 @@ if __name__ == "__main__":
240
  project_name = "huggingface_auto_upload_test-dolly-15k"
241
 
242
  dataset_dict = load_dataset_and_metadata(dataset_name)
243
- print(upload_project_to_atlas(dataset_dict, project_name=project_name))
 
109
  return dataset_dict
110
 
111
 
112
+ def upload_dataset_to_atlas(dataset_dict,
113
  project_name = None,
114
  unique_id_field_name=None,
115
  indexed_field = None,
116
  modality=None,
117
+ organization_name=None,
118
+ wait_for_map=True):
119
 
120
  if modality is None:
121
  modality = "text"
 
232
  topic_label_field = topic_label_field,
233
  build_topic_model=True)
234
 
235
+ if wait_for_map:
236
+ with proj.wait_for_project_lock():
237
+ time.sleep(1)
238
+
239
  return projection.map_link
240
 
241
  # Run test
 
245
  project_name = "huggingface_auto_upload_test-dolly-15k"
246
 
247
  dataset_dict = load_dataset_and_metadata(dataset_name)
248
+ print(upload_dataset_to_atlas(dataset_dict, project_name=project_name))
form.html DELETED
@@ -1,11 +0,0 @@
1
- <!DOCTYPE html>
2
- <html>
3
- <body>
4
- <h2>HTML Form</h2>
5
- <form action="/submit_form" method="post">
6
- <label for="name">Name:</label><br>
7
- <input type="text" id="dataset_name" name="dataset_name" value=""><br>
8
- <input type="submit" value="Submit">
9
- </form>
10
- </body>
11
- </html>
 
 
 
 
 
 
 
 
 
 
 
 
main.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Form, Request, BackgroundTasks
2
+ from fastapi.responses import HTMLResponse
3
+ from fastapi.templating import Jinja2Templates
4
+ #from pydantic import BaseModel
5
+
6
+ from uuid import uuid4
7
+ import asyncio
8
+
9
+ from build_map import load_dataset_and_metadata, upload_dataset_to_atlas
10
+
11
+
12
+ app = FastAPI()
13
+ # TODO: use task management queue
14
+ tasks = {}
15
+ templates = Jinja2Templates(directory="templates")
16
+
17
+ def upload_atlas_task(task_id, dataset_name):
18
+ dataset_dict = load_dataset_and_metadata(dataset_name)
19
+ map_url = upload_dataset_to_atlas(dataset_dict, project_name="atlas-space-test")
20
+ tasks[task_id]['status'] = 'done'
21
+ tasks[task_id]['url'] = map_url
22
+
23
+ @app.on_event("startup")
24
+ async def startup_event():
25
+ asyncio.create_task(cleanup_tasks())
26
+
27
+ async def cleanup_tasks():
28
+ while True:
29
+ current_time = time.time()
30
+ tasks_to_delete = []
31
+ for task_id, task in tasks.items():
32
+ if task['status'] == 'done' and current_time - task.get('finish_time', current_time) > 1800: # 30 minutes
33
+ tasks_to_delete.append(task_id)
34
+ for task_id in tasks_to_delete:
35
+ del tasks[task_id]
36
+ await asyncio.sleep(1800) # Wait for 30 minutes
37
+
38
+ @app.get("/", response_class=HTMLResponse)
39
+ async def read_form(request: Request):
40
+ # Render the form.html template
41
+ return templates.TemplateResponse("form.html", {"request": request})
42
+
43
+ @app.post("/submit_form")
44
+ async def form_post(background_tasks: BackgroundTasks, dataset_name: str = Form(...)):
45
+ task_id = str(uuid4())
46
+ tasks[task_id] = {'status': 'running'}
47
+ #form_data = DatasetForm(dataset_name=dataset_name)
48
+ background_tasks.add_task(upload_atlas_task, task_id, dataset_name)
49
+
50
+ return {'task_id': task_id}
51
+
52
+ @app.get("/status/{task_id}")
53
+ async def read_task(task_id: str):
54
+ if task_id not in tasks:
55
+ return {'status': 'not found'}
56
+ else:
57
+ return tasks[task_id]
templates/form.html ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
6
+ <title>Atlas Map</title>
7
+
8
+ <!-- CSS only -->
9
+ <link href="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css" rel="stylesheet">
10
+
11
+ <style>
12
+ body {
13
+ display: flex;
14
+ justify-content: center;
15
+ align-items: center;
16
+ height: 100vh;
17
+ background-color: #f5f5f5;
18
+ }
19
+
20
+ .form-container {
21
+ background-color: white;
22
+ padding: 30px;
23
+ border-radius: 10px;
24
+ box-shadow: 0px 0px 10px 0px rgba(0,0,0,0.1);
25
+ }
26
+
27
+ .form-container h2 {
28
+ margin-bottom: 30px;
29
+ }
30
+
31
+ #loading {
32
+ position: absolute;
33
+ top: 0;
34
+ left: 0;
35
+ right: 0;
36
+ bottom: 0;
37
+ background-color: rgba(0, 0, 0, 0.5);
38
+ display: flex;
39
+ justify-content: center;
40
+ align-items: center;
41
+ }
42
+
43
+ .loading-content {
44
+ padding: 20px;
45
+ background-color: white;
46
+ border-radius: 5px;
47
+ font-size: 20px;
48
+ color: #333;
49
+ box-shadow: 0px 0px 10px 0px rgba(0,0,0,0.1);
50
+ }
51
+ </style>
52
+ </head>
53
+ <body>
54
+
55
+ <div class="container form-container">
56
+ <h2>HuggingFace Dataset to Atlas Map</h2>
57
+ <form id="myForm" action="/submit_form" method="post">
58
+ <div class="form-group">
59
+ <label for="dataset_name">Dataset Name:</label>
60
+ <input type="text" class="form-control" id="dataset_name" name="dataset_name">
61
+ </div>
62
+ <button type="submit" class="btn btn-primary">Submit</button>
63
+ </form>
64
+
65
+ <div id="loading" style="display: none;">
66
+ <div class="loading-content">
67
+ Building map...
68
+ </div>
69
+ </div>
70
+ </div>
71
+
72
+ <script>
73
+ window.onload = function() {
74
+ document.getElementById('myForm').onsubmit = async function(e) {
75
+ e.preventDefault(); // Prevent the form from submitting normally
76
+
77
+ document.getElementById('loading').style.display = 'block'; // Show the loading message
78
+
79
+ let formData = new FormData(this);
80
+ let response = await fetch('/submit_form', {method: 'POST', body: formData}); // Send the form data to the server
81
+ let data = await response.json(); // Parse the server's response
82
+ checkStatus(data.task_id);
83
+ };
84
+ }
85
+
86
+ async function checkStatus(taskId) {
87
+ let response = await fetch(`/status/${taskId}`);
88
+ let data = await response.json();
89
+ if (data.status === 'running') {
90
+ setTimeout(() => checkStatus(taskId), 5000); // Check again in 5 seconds
91
+ } else {
92
+ window.location.href = data.url; // Redirect to the finished URL
93
+ }
94
+ }
95
+ </script>
96
+
97
+ </body>
98
+ </html>