FROM nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04 ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && \ apt-get upgrade -y && \ apt-get install -y --no-install-recommends ca-certificates \ git \ git-lfs \ wget \ curl \ # python build dependencies \ # build-essential \ # libssl-dev \ # zlib1g-dev \ # libbz2-dev \ # libreadline-dev \ # libsqlite3-dev \ # libncursesw5-dev \ # xz-utils \ # tk-dev \ # libxml2-dev \ # libxmlsec1-dev \ # libffi-dev \ # liblzma-dev \ nvidia-driver-550 \ python3.10 \ python3.10-venv \ python3-pip \ python-is-python3 # ffmpeg # software-properties-common # RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb # RUN dpkg -i cuda-keyring_1.1-1_all.deb # RUN apt-get update # RUN apt-get -y install cuda-toolkit-12-4 RUN curl -L https://ollama.com/download/ollama-linux-amd64 -o /usr/bin/ollama RUN chmod +x /usr/bin/ollama ENV USER='user' RUN useradd -m -u 1000 ${USER} USER ${USER} ENV HOME=/home/${USER} \ PATH=${HOME}/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:${PATH} \ APPDIR=${HOME}/app WORKDIR ${APPDIR} COPY --chown=1000 . ${APPDIR} # ENV NVIDIA_VISIBLE_DEVICES=all # RUN curl https://pyenv.run | bash # ENV PATH=${HOME}/.pyenv/shims:${HOME}/.pyenv/bin:${PATH} # ARG PYTHON_VERSION=3.10.13 # RUN pyenv install ${PYTHON_VERSION} && \ # pyenv global ${PYTHON_VERSION} && \ # pyenv rehash && \ # RUN python3.10 -m venv .venv # RUN . .venv/bin/activate ENV PYTHONPATH=${HOME}/.local/bin:${PYTHONPATH} RUN python -m pip install --no-cache-dir -U pip setuptools wheel RUN python -m pip install "huggingface-hub" "hf-transfer" "gradio[oauth]>=4.28.0" "gradio_huggingfacehub_search==0.0.7" "APScheduler" # RUN deactivate # RUN go install golang.org/x/tools/gopls@latest # RUN git clone https://github.com/ollama/ollama # WORKDIR ${APPDIR}/ollama # RUN OLLAMA_CUSTOM_CPU_DEFS="-DGGML_AVX=on -DGGML_AVX2=on -DGGML_F16C=on -DGGML_FMA=on" go generate ./... --verbose \ # go build . --verbose \ # go install . --verbose RUN git clone https://github.com/ggerganov/llama.cpp COPY groups_merged.txt llama.cpp/. WORKDIR ${APPDIR}/llama.cpp RUN git pull RUN python -m pip install -r requirements.txt RUN GGML_CUDA=1 LLAMA_CUDA=1 make -j llama-quantize ENV PYTHONUNBUFFERED=1 \ HF_HUB_ENABLE_HF_TRANSFER=1 \ GRADIO_ALLOW_FLAGGING=never \ GRADIO_NUM_PORTS=1 \ GRADIO_SERVER_NAME=0.0.0.0 \ GRADIO_THEME=huggingface \ TQDM_POSITION=-1 \ TQDM_MININTERVAL=1 \ SYSTEM=spaces \ LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/local/cuda/lib64:${LD_LIBRARY_PATH} \ NVIDIA_DRIVER_CAPABILITIES=compute,utility \ NVIDIA_VISIBLE_DEVICES=all \ OLLAMA_HOST=0.0.0.0 WORKDIR ${APPDIR} # EXPOSE map[11434/tcp:{}] RUN ollama serve & sleep 5 # RUN . .venv/bin/activate ENTRYPOINT python app.py