Spaces:
Paused
Paused
vllm
Browse files- Dockerfile +31 -37
- entrypoint.sh +11 -0
Dockerfile
CHANGED
@@ -1,47 +1,41 @@
|
|
1 |
-
|
2 |
-
FROM nvidia/cuda:${CUDA_IMAGE}
|
3 |
-
|
4 |
-
# We need to set the host to 0.0.0.0 to allow outside access
|
5 |
-
ENV HOST 0.0.0.0
|
6 |
-
|
7 |
-
RUN apt-get update && apt-get upgrade -y \
|
8 |
-
&& apt-get install -y git build-essential \
|
9 |
-
python3 python3-pip gcc wget \
|
10 |
-
ocl-icd-opencl-dev opencl-headers clinfo \
|
11 |
-
libclblast-dev libopenblas-dev \
|
12 |
-
&& mkdir -p /etc/OpenCL/vendors && echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd
|
13 |
-
|
14 |
-
COPY . .
|
15 |
-
|
16 |
-
# setting build related env vars
|
17 |
-
ENV CUDA_DOCKER_ARCH=all
|
18 |
-
ENV LLAMA_CUBLAS=1
|
19 |
-
|
20 |
-
# Install depencencies
|
21 |
-
RUN python3 -m pip install --upgrade pip pytest cmake \
|
22 |
-
scikit-build setuptools fastapi uvicorn sse-starlette \
|
23 |
-
pydantic-settings huggingface_hub hf_transfer
|
24 |
-
|
25 |
-
# Install llama-cpp-python (build with cuda)
|
26 |
-
RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install llama-cpp-python
|
27 |
|
|
|
28 |
RUN useradd -m -u 1000 user
|
|
|
29 |
# Switch to the "user" user
|
30 |
USER user
|
|
|
31 |
# Set home to the user's home directory
|
32 |
ENV HOME=/home/user \
|
33 |
-
PATH=/home/user/.local/bin:$PATH
|
34 |
-
PYTHONPATH=$HOME/app \
|
35 |
-
PYTHONUNBUFFERED=1 \
|
36 |
-
GRADIO_ALLOW_FLAGGING=never \
|
37 |
-
GRADIO_NUM_PORTS=1 \
|
38 |
-
GRADIO_SERVER_NAME=0.0.0.0 \
|
39 |
-
GRADIO_THEME=huggingface \
|
40 |
-
SYSTEM=spaces
|
41 |
|
|
|
42 |
WORKDIR $HOME/app
|
43 |
|
44 |
-
|
45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
|
47 |
-
|
|
|
1 |
+
FROM --platform=amd64 nvcr.io/nvidia/cuda:12.1.0-devel-ubuntu22.04 as base
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
+
# Set up a new user named "user" with user ID 1000
|
4 |
RUN useradd -m -u 1000 user
|
5 |
+
|
6 |
# Switch to the "user" user
|
7 |
USER user
|
8 |
+
|
9 |
# Set home to the user's home directory
|
10 |
ENV HOME=/home/user \
|
11 |
+
PATH=/home/user/.local/bin:$PATH
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
+
# Set the working directory to the user's home directory
|
14 |
WORKDIR $HOME/app
|
15 |
|
16 |
+
RUN apt update && \
|
17 |
+
apt install -y python3-pip python3-packaging \
|
18 |
+
git ninja-build && \
|
19 |
+
pip3 install -U pip
|
20 |
+
|
21 |
+
# Tweak this list to reduce build time
|
22 |
+
# https://developer.nvidia.com/cuda-gpus
|
23 |
+
ENV TORCH_CUDA_ARCH_LIST "7.0;7.2;7.5;8.0;8.6;8.9;9.0"
|
24 |
+
|
25 |
+
RUN pip3 install "torch==2.1.1"
|
26 |
+
|
27 |
+
# This build is slow but NVIDIA does not provide binaries. Increase MAX_JOBS as needed.
|
28 |
+
RUN pip3 install "git+https://github.com/stanford-futuredata/megablocks.git"
|
29 |
+
RUN pip3 install "git+https://github.com/vllm-project/vllm.git"
|
30 |
+
RUN pip3 install "xformers==0.0.23" "transformers==4.36.0" "fschat[model_worker]==0.2.34"
|
31 |
+
|
32 |
+
RUN git clone https://github.com/NVIDIA/apex && \
|
33 |
+
cd apex && git checkout 2386a912164b0c5cfcd8be7a2b890fbac5607c82 && \
|
34 |
+
sed -i '/check_cuda_torch_binary_vs_bare_metal(CUDA_HOME)/d' setup.py && \
|
35 |
+
python3 setup.py install --cpp_ext --cuda_ext
|
36 |
+
|
37 |
+
COPY entrypoint.sh .
|
38 |
+
|
39 |
+
RUN chmod +x $HOME/app/entrypoint.sh
|
40 |
|
41 |
+
ENTRYPOINT ["$HOME/app/entrypoint.sh"]
|
entrypoint.sh
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
|
3 |
+
if [[ ! -z "${HF_TOKEN}" ]]; then
|
4 |
+
echo "The HF_TOKEN environment variable set, logging to Hugging Face."
|
5 |
+
python3 -c "import huggingface_hub; huggingface_hub.login('${HF_TOKEN}')"
|
6 |
+
else
|
7 |
+
echo "The HF_TOKEN environment variable is not set or empty, not logging to Hugging Face."
|
8 |
+
fi
|
9 |
+
|
10 |
+
# Run the provided command
|
11 |
+
exec python3 -u -m vllm.entrypoints.openai.api_server "${HF_MODEL}" --host "0.0.0.0" --port 7860
|