justram commited on
Commit
fef9918
1 Parent(s): 5e126b0

Upload folder using huggingface_hub

Browse files
Files changed (8) hide show
  1. .dockerignore +26 -0
  2. Dockerfile +20 -0
  3. app.py +42 -0
  4. bokeh_plot.py +59 -0
  5. config.toml +3 -0
  6. contour_data.pkl +3 -0
  7. processed_data.pkl +3 -0
  8. requirements.txt +5 -0
.dockerignore ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ignore Python cache files
2
+ __pycache__/
3
+ *.pyc
4
+ *.pyo
5
+
6
+ # Ignore virtual environments
7
+ venv/
8
+ env/
9
+ .venv/
10
+
11
+ # Ignore version control system directories
12
+ .git/
13
+ .gitignore
14
+
15
+ # Ignore Docker-related files
16
+ Dockerfile
17
+ .dockerignore
18
+
19
+ # Ignore IDE/editor-specific files
20
+ .vscode/
21
+ .idea/
22
+
23
+ # Ignore any additional temporary files
24
+ *.tmp
25
+ *.log
26
+ *.swp
Dockerfile ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use the official Python image from the Docker Hub
2
+ FROM python:3.8-slim
3
+
4
+ # Set the working directory in the container
5
+ WORKDIR /app
6
+
7
+ # Copy the requirements.txt file into the container
8
+ COPY requirements.txt .
9
+
10
+ # Install the dependencies
11
+ RUN pip install --no-cache-dir -r requirements.txt
12
+
13
+ # Copy the rest of the application code into the container
14
+ COPY . .
15
+
16
+ # Expose the port the app runs on
17
+ EXPOSE 8501
18
+
19
+ # Command to run the Streamlit app
20
+ CMD ["streamlit", "run", "streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]
app.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import streamlit as st
3
+ from streamlit_lottie import st_lottie
4
+ from bokeh.embed import components
5
+ from bokeh_plot import create_plot
6
+
7
+ @st.cache_data()
8
+ def load_lottieurl(url: str):
9
+ r = requests.get(url)
10
+ if r.status_code != 200:
11
+ return None
12
+ return r.json()
13
+
14
+ st.set_page_config(
15
+ page_title="AToMiC2024 Images (Sampled 50k)",
16
+ page_icon="⚛️",
17
+ layout="wide",
18
+ initial_sidebar_state="auto",
19
+ menu_items={'About': '## UMAP Embeddings of AToMiC2024 images'}
20
+ )
21
+
22
+ if __name__ == "__main__":
23
+ col1, col2 = st.columns([0.15, 0.85])
24
+ with col1:
25
+ lottie = load_lottieurl("https://lottie.host/de47fd4c-99cb-48a7-ae10-59d4eb8e4dbe/bXMpZN95tA.json")
26
+ st_lottie(lottie)
27
+
28
+ with col2:
29
+ st.write(
30
+ """
31
+ ## AToMiC Image Explorer
32
+ ### Subsampled AToMiC Images using [CLIP-ViT-BigG](https://huggingface.co/laion/CLIP-ViT-bigG-14-laion2B-39B-b160k)
33
+ - **Subsampling Procedure:** Hierarchical K-Means [10, 10, 10, 10], randomly sampled 50 from the leaf clusters -> random sample 25k for visualization.
34
+ - Original [Image Collection](https://huggingface.co/datasets/TREC-AToMiC/AToMiC-Images-v0.2)
35
+ - Prebuilt [Embeddings/Index](https://huggingface.co/datasets/TREC-AToMiC/AToMiC-Baselines/tree/main/indexes)
36
+ - Questions? Leave an issue at our [repo](https://github.com/TREC-AToMiC/AToMiC).
37
+ - It takes a few minutes to render the plot.
38
+ """
39
+ )
40
+ # Generate the Bokeh plot
41
+ bokeh_plot = create_plot()
42
+ st.bokeh_chart(bokeh_plot, use_container_width=False)
bokeh_plot.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pickle
2
+ import pandas as pd
3
+ from bokeh.plotting import figure
4
+ from bokeh.models import ColumnDataSource, HoverTool, Div
5
+ from bokeh.layouts import column
6
+ import logging
7
+
8
+ # Configure logging
9
+ logging.basicConfig(level=logging.INFO)
10
+ logger = logging.getLogger(__name__)
11
+
12
+ def create_plot():
13
+ # Load preprocessed data
14
+ logger.info("Loading preprocessed data...")
15
+ with open('processed_data.pkl', 'rb') as f:
16
+ df = pickle.load(f)
17
+ with open('contour_data.pkl', 'rb') as f:
18
+ contour_data = pickle.load(f)
19
+ logger.info("Data loaded successfully.")
20
+
21
+ logger.info("Creating Bokeh plot...")
22
+
23
+ p = figure(width=1280, height=800, title="UMAP projection of embeddings")
24
+
25
+ # Load contour data
26
+ contour_source = ColumnDataSource(data=dict(xs=contour_data['xs'],
27
+ ys=contour_data['ys'],
28
+ color=contour_data['color']))
29
+ contour_renderer = p.patches(xs="xs", ys="ys", source=contour_source, fill_alpha=0.3, line_color=None, fill_color="color")
30
+
31
+ # Scatter plot
32
+ source = ColumnDataSource(df)
33
+ scatter_renderer = p.scatter('x', 'y', size=3, source=source, fill_alpha=0.2, line_alpha=0.1)
34
+
35
+ # Configure hover tool to display images
36
+ hover = HoverTool(renderers=[scatter_renderer])
37
+ hover.tooltips = """
38
+ <div>
39
+ <div>
40
+ <strong>Image ID:</strong> @id
41
+ </div>
42
+ <div>
43
+ <strong>Cap_ref:</strong> @caption
44
+ </div>
45
+ <div>
46
+ <strong>URL:</strong> @url
47
+ </div>
48
+ <div>
49
+ <img
50
+ src="data:image/jpeg;base64,@image_b64" height="200" alt="Image"
51
+ style="float: left; margin: 0px 15px 15px 0px;"
52
+ border="2"
53
+ ></img>
54
+ </div>
55
+ </div>
56
+ """
57
+ p.add_tools(hover)
58
+
59
+ return p
config.toml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [server]
2
+ maxUploadSize = 1024
3
+ maxMessageSize = 500
contour_data.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a992bb94f2a6d982d266b4e9cac4ae601bf1dcb5a0d99c1fa5d02da3d069bfc6
3
+ size 386638
processed_data.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:364112f46defd36ce1c8d3e4e49e48ba2e27e4863ffd65376814ab28611d4fc0
3
+ size 94337630
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ bokeh==2.4.3
2
+ pandas
3
+ numpy
4
+ streamlit
5
+ streamlit-lottie