#!/usr/bin/bash # requires the gum package # GUM: https://github.com/charmbracelet/gum#installation # Uses conda & koboldcpp built from source # Assumes you are using nvidia # activates conda env named kobold # if you get a conda error try this: `conda install conda-forge::libstdcxx-ng` eval "$(conda shell.bash hook)" conda activate kobold # USER variables # CHANGE KOBOLD_DIR=~/Git/koboldcpp/ MODEL_FOLDER_DIR=~/Downloads/GGUF API_PORT=8001 API_HOST=192.168.1.20 BLAS_THREADS=16 THREADS=8 gum style \ --foreground 212 --border-foreground 212 --border double \ --align center --width 100 --margin "1 2" --padding "2 4" --bold \ 'Beep...Boop... Script created by Virt-io' # refer to https://github.com/LostRuins/koboldcpp?tab=readme-ov-file#osx-and-linux-manual-compiling for dependencies # CD to koboldcpp dir cd $KOBOLD_DIR # edit make flags to fit your needs echo "Git pull & build?" KOBOLD_BUILD=$(gum choose --selected.bold --selected.underline "NO" "YES") if [ "$KOBOLD_BUILD" = "YES" ]; then gum spin --spinner monkey --title "Pulling latest changes..." -- git pull && make clean gum spin --show-output --spinner monkey --title "Re-building Koboldcpp..." -- make -j8 LLAMA_NO_LLAMAFILE=0 LLAMA_CUDA=1 LLAMA_CUBLAS=1 LLAMA_OPENBLAS=1 LLAMA_FAST=1 LLAMA_NO_CCACHE=1 LLAMA_CUDA_MMV_Y=2 LLAMA_CUDA_DMMV_X=64 LLAMA_CUDA_DMMV_F16=1 LLAMA_CUDA_F16=1 LLAMA_NATIVE=1 LLAMA_LTO=1 LLAMA_AVX=1 LLAMA_AVX2=1 LLAMA_FMA=1 LLAMA_F16C=1 echo "Rebuild completed" elif [ "$KOBOLD_BUILD" = "NO" ]; then echo "Skipping rebuild." fi # lists models echo "Select Model" MODEL=$(gum choose --height=30 --selected.bold --selected.underline $(ls $MODEL_FOLDER_DIR)) # uncomment if you use sharded models. It will take the first file in a dir and load it. #SHARDED_MODEL=$(ls -p $MODEL_FOLDER_DIR/$MODEL | grep -v / | head -1) #MODEL=$MODEL/$SHARDED_MODEL echo "$MODEL has been selected" echo "Layers to Offload" LAYERS=$(gum input --placeholder "99") echo "$LAYERS layers have been offloaded" echo "Context Size" CONTEXT=$(gum choose --height=10 --selected.bold --selected.underline "4096" "8192" "12288" "16384" "32768") echo "Using a context size of $CONTEXT" # combined user flags USER_FLAGS="--host $API_HOST --port $API_PORT --flashattention --blasbatchsize 2048 --threads $THREADS --blasthreads $BLAS_THREADS --usecublas normal --contextsize $CONTEXT --gpulayers $LAYERS --model $MODEL_FOLDER_DIR/$MODEL" # runs koboldcpp python koboldcpp.py --skiplauncher $USER_FLAGS