Hggg / Dockerfile
Yhhxhfh's picture
Update Dockerfile
b1ab556 verified
FROM ubuntu:22.04 AS base
ENV CCACHE_DIR=/root/.cache/ccache
ENV CMAKE_CXX_COMPILER_LAUNCHER=ccache
ENV PIP_EXTRA_INDEX_URL=https://download.pytorch.org/whl/cpu
ENV LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4:/usr/local/lib/libiomp5.so"
ENV TRANSFORMERS_CACHE="/root/.cache/huggingface/hub"
ENV VLLM_CPU_DISABLE_AVX512=${VLLM_CPU_DISABLE_AVX512}
RUN --mount=type=cache,target=/var/cache/apt \
apt-get update -y && \
apt-get install -y \
curl \
ccache \
git \
wget \
vim \
numactl \
gcc-12 \
g++-12 \
python3 \
python3-pip \
libtcmalloc-minimal4 \
libnuma-dev \
ffmpeg \
libsm6 \
libxext6 \
libgl1 \
cmake && \
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12 && \
rm -rf /var/lib/apt/lists/*
RUN --mount=type=cache,target=/root/.cache/pip \
pip install intel-openmp
RUN useradd -ms /bin/bash vllmuser
USER vllmuser
WORKDIR /home/vllmuser
RUN pip install https://intel-extension-for-pytorch.s3.amazonaws.com/ipex_dev/cpu/intel_extension_for_pytorch-2.4.0%2Bgitfbaa4bc-cp310-cp310-linux_x86_64.whl
RUN git clone https://github.com/vllm-project/vllm.git
WORKDIR /home/vllmuser/vllm
COPY requirements-build.txt requirements-build.txt
COPY requirements-common.txt requirements-common.txt
COPY requirements-cpu.txt requirements-cpu.txt
RUN --mount=type=cache,target=/root/.cache/pip \
pip install --upgrade pip && \
pip install -r requirements-build.txt
RUN git clone -b rls-v3.5 https://github.com/oneapi-src/oneDNN.git
RUN --mount=type=cache,target=/root/.cache/ccache \
cmake -B ./oneDNN/build -S ./oneDNN -G "Unix Makefiles" \
-DONEDNN_LIBRARY_TYPE=STATIC \
-DONEDNN_BUILD_DOC=OFF \
-DONEDNN_BUILD_EXAMPLES=OFF \
-DONEDNN_BUILD_TESTS=OFF \
-DONEDNN_BUILD_GRAPH=OFF \
-DONEDNN_ENABLE_WORKLOAD=INFERENCE \
-DONEDNN_ENABLE_PRIMITIVE=MATMUL && \
cmake --build ./oneDNN/build --target install --config Release
WORKDIR /home/vllmuser/vllm
RUN --mount=type=cache,target=/root/.cache/pip \
pip install -r requirements-cpu.txt
COPY ./ /home/vllmuser/vllm/
ENV CFLAGS="-O3 -march=native -ffast-math -fopenmp"
ENV CXXFLAGS="-O3 -march=native -ffast-math -fopenmp"
RUN --mount=type=cache,target=/root/.cache/pip \
--mount=type=cache,target=/root/.cache/ccache \
--mount=type=bind,source=.git,target=.git \
VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel && \
pip install dist/*.whl && \
rm -rf dist
RUN [ -d /home/vllmuser/vllm/tests ] || ln -s /home/vllmuser/vllm/tests /home/vllmuser/tests && \
[ -d /home/vllmuser/vllm/examples ] || ln -s /home/vllmuser/vllm/examples /home/vllmuser/examples && \
[ -d /home/vllmuser/vllm/benchmarks ] || ln -s /home/vllmuser/vllm/benchmarks /home/vllmuser/benchmarks
RUN pip install "ray[serve]"
EXPOSE 8000 7860
CMD ["bash", "-c", "vllm serve llava-hf/llava-interleave-qwen-0.5b-hf --port 7860 & wait"]