FROM nvcr.io/nvidia/tritonserver:22.11-py3 WORKDIR /workspace RUN apt-get update && apt-get install cmake -y RUN pip install --upgrade pip && pip install --upgrade tensorrt RUN git clone https://github.com/NVIDIA/TensorRT.git -b main --single-branch \ && cd TensorRT \ && git submodule update --init --recursive ENV TRT_OSSPATH=/workspace/TensorRT WORKDIR ${TRT_OSSPATH} RUN mkdir -p build \ && cd build \ && cmake .. -DTRT_OUT_DIR=$PWD/out \ && cd plugin \ && make -j$(nproc) ENV PLUGIN_LIBS="${TRT_OSSPATH}/build/out/libnvinfer_plugin.so" WORKDIR /weights RUN wget https://huggingface.co./remyxai/SpaceLLaVA/resolve/main/ggml-model-q4_0.gguf RUN wget https://huggingface.co./remyxai/SpaceLLaVA/resolve/main/mmproj-model-f16.gguf RUN python3 -m pip install torch==2.0.1 torchvision==0.15.2 torchaudio==2.0.2 --index-url https://download.pytorch.org/whl/cu118 RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install llama-cpp-python==0.2.45 --force-reinstall --no-cache-dir WORKDIR /models COPY ./models/ . WORKDIR /workspace CMD ["tritonserver", "--model-store=/models"]