huggingface-hub llama-cpp-python --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu121 -C cmake.args="-DLLAMA_CUDA=ON"