Spaces:

Yhhxhfh
/

Hggg

Build error

App Files Files Community

Hggg / Dockerfile

Yhhxhfh

Update Dockerfile

b1ab556 verified about 2 months ago

raw

history blame contribute delete

2.99 kB

	FROM ubuntu:22.04 AS base

	ENV CCACHE_DIR=/root/.cache/ccache
	ENV CMAKE_CXX_COMPILER_LAUNCHER=ccache
	ENV PIP_EXTRA_INDEX_URL=https://download.pytorch.org/whl/cpu
	ENV LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4:/usr/local/lib/libiomp5.so"
	ENV TRANSFORMERS_CACHE="/root/.cache/huggingface/hub"
	ENV VLLM_CPU_DISABLE_AVX512=${VLLM_CPU_DISABLE_AVX512}

	RUN --mount=type=cache,target=/var/cache/apt \
	apt-get update -y && \
	apt-get install -y \
	curl \
	ccache \
	git \
	wget \
	vim \
	numactl \
	gcc-12 \
	g++-12 \
	python3 \
	python3-pip \
	libtcmalloc-minimal4 \
	libnuma-dev \
	ffmpeg \
	libsm6 \
	libxext6 \
	libgl1 \
	cmake && \
	update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12 && \
	rm -rf /var/lib/apt/lists/*

	RUN --mount=type=cache,target=/root/.cache/pip \
	pip install intel-openmp

	RUN useradd -ms /bin/bash vllmuser
	USER vllmuser
	WORKDIR /home/vllmuser

	RUN pip install https://intel-extension-for-pytorch.s3.amazonaws.com/ipex_dev/cpu/intel_extension_for_pytorch-2.4.0%2Bgitfbaa4bc-cp310-cp310-linux_x86_64.whl

	RUN git clone https://github.com/vllm-project/vllm.git

	WORKDIR /home/vllmuser/vllm

	COPY requirements-build.txt requirements-build.txt
	COPY requirements-common.txt requirements-common.txt
	COPY requirements-cpu.txt requirements-cpu.txt

	RUN --mount=type=cache,target=/root/.cache/pip \
	pip install --upgrade pip && \
	pip install -r requirements-build.txt

	RUN git clone -b rls-v3.5 https://github.com/oneapi-src/oneDNN.git

	RUN --mount=type=cache,target=/root/.cache/ccache \
	cmake -B ./oneDNN/build -S ./oneDNN -G "Unix Makefiles" \
	-DONEDNN_LIBRARY_TYPE=STATIC \
	-DONEDNN_BUILD_DOC=OFF \
	-DONEDNN_BUILD_EXAMPLES=OFF \
	-DONEDNN_BUILD_TESTS=OFF \
	-DONEDNN_BUILD_GRAPH=OFF \
	-DONEDNN_ENABLE_WORKLOAD=INFERENCE \
	-DONEDNN_ENABLE_PRIMITIVE=MATMUL && \
	cmake --build ./oneDNN/build --target install --config Release

	WORKDIR /home/vllmuser/vllm

	RUN --mount=type=cache,target=/root/.cache/pip \
	pip install -r requirements-cpu.txt

	COPY ./ /home/vllmuser/vllm/

	ENV CFLAGS="-O3 -march=native -ffast-math -fopenmp"
	ENV CXXFLAGS="-O3 -march=native -ffast-math -fopenmp"

	RUN --mount=type=cache,target=/root/.cache/pip \
	--mount=type=cache,target=/root/.cache/ccache \
	--mount=type=bind,source=.git,target=.git \
	VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel && \
	pip install dist/*.whl && \
	rm -rf dist

	RUN [ -d /home/vllmuser/vllm/tests ] \|\| ln -s /home/vllmuser/vllm/tests /home/vllmuser/tests && \
	[ -d /home/vllmuser/vllm/examples ] \|\| ln -s /home/vllmuser/vllm/examples /home/vllmuser/examples && \
	[ -d /home/vllmuser/vllm/benchmarks ] \|\| ln -s /home/vllmuser/vllm/benchmarks /home/vllmuser/benchmarks

	RUN pip install "ray[serve]"

	EXPOSE 8000 7860

	CMD ["bash", "-c", "vllm serve llava-hf/llava-interleave-qwen-0.5b-hf --port 7860 & wait"]