|
FROM ubuntu:22.04 AS base |
|
|
|
ENV CCACHE_DIR=/root/.cache/ccache |
|
ENV CMAKE_CXX_COMPILER_LAUNCHER=ccache |
|
ENV PIP_EXTRA_INDEX_URL=https://download.pytorch.org/whl/cpu |
|
ENV LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4:/usr/local/lib/libiomp5.so" |
|
ENV TRANSFORMERS_CACHE="/root/.cache/huggingface/hub" |
|
ENV VLLM_CPU_DISABLE_AVX512=${VLLM_CPU_DISABLE_AVX512} |
|
|
|
RUN --mount=type=cache,target=/var/cache/apt \ |
|
apt-get update -y && \ |
|
apt-get install -y \ |
|
curl \ |
|
ccache \ |
|
git \ |
|
wget \ |
|
vim \ |
|
numactl \ |
|
gcc-12 \ |
|
g++-12 \ |
|
python3 \ |
|
python3-pip \ |
|
libtcmalloc-minimal4 \ |
|
libnuma-dev \ |
|
ffmpeg \ |
|
libsm6 \ |
|
libxext6 \ |
|
libgl1 \ |
|
cmake && \ |
|
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12 && \ |
|
rm -rf /var/lib/apt/lists/* |
|
|
|
RUN --mount=type=cache,target=/root/.cache/pip \ |
|
pip install intel-openmp |
|
|
|
RUN useradd -ms /bin/bash vllmuser |
|
USER vllmuser |
|
WORKDIR /home/vllmuser |
|
|
|
RUN pip install https://intel-extension-for-pytorch.s3.amazonaws.com/ipex_dev/cpu/intel_extension_for_pytorch-2.4.0%2Bgitfbaa4bc-cp310-cp310-linux_x86_64.whl |
|
|
|
RUN git clone https://github.com/vllm-project/vllm.git |
|
|
|
WORKDIR /home/vllmuser/vllm |
|
|
|
COPY requirements-build.txt requirements-build.txt |
|
COPY requirements-common.txt requirements-common.txt |
|
COPY requirements-cpu.txt requirements-cpu.txt |
|
|
|
RUN --mount=type=cache,target=/root/.cache/pip \ |
|
pip install --upgrade pip && \ |
|
pip install -r requirements-build.txt |
|
|
|
RUN git clone -b rls-v3.5 https://github.com/oneapi-src/oneDNN.git |
|
|
|
RUN --mount=type=cache,target=/root/.cache/ccache \ |
|
cmake -B ./oneDNN/build -S ./oneDNN -G "Unix Makefiles" \ |
|
-DONEDNN_LIBRARY_TYPE=STATIC \ |
|
-DONEDNN_BUILD_DOC=OFF \ |
|
-DONEDNN_BUILD_EXAMPLES=OFF \ |
|
-DONEDNN_BUILD_TESTS=OFF \ |
|
-DONEDNN_BUILD_GRAPH=OFF \ |
|
-DONEDNN_ENABLE_WORKLOAD=INFERENCE \ |
|
-DONEDNN_ENABLE_PRIMITIVE=MATMUL && \ |
|
cmake --build ./oneDNN/build --target install --config Release |
|
|
|
WORKDIR /home/vllmuser/vllm |
|
|
|
RUN --mount=type=cache,target=/root/.cache/pip \ |
|
pip install -r requirements-cpu.txt |
|
|
|
COPY ./ /home/vllmuser/vllm/ |
|
|
|
ENV CFLAGS="-O3 -march=native -ffast-math -fopenmp" |
|
ENV CXXFLAGS="-O3 -march=native -ffast-math -fopenmp" |
|
|
|
RUN --mount=type=cache,target=/root/.cache/pip \ |
|
--mount=type=cache,target=/root/.cache/ccache \ |
|
--mount=type=bind,source=.git,target=.git \ |
|
VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel && \ |
|
pip install dist/*.whl && \ |
|
rm -rf dist |
|
|
|
RUN [ -d /home/vllmuser/vllm/tests ] || ln -s /home/vllmuser/vllm/tests /home/vllmuser/tests && \ |
|
[ -d /home/vllmuser/vllm/examples ] || ln -s /home/vllmuser/vllm/examples /home/vllmuser/examples && \ |
|
[ -d /home/vllmuser/vllm/benchmarks ] || ln -s /home/vllmuser/vllm/benchmarks /home/vllmuser/benchmarks |
|
|
|
RUN pip install "ray[serve]" |
|
|
|
EXPOSE 8000 7860 |
|
|
|
CMD ["bash", "-c", "vllm serve llava-hf/llava-interleave-qwen-0.5b-hf --port 7860 & wait"] |
|
|