zerogpu-2

Running on Zero

rphrp1985 commited on Jun 8, 2024

Commit

06cc7e1

verified ·

1 Parent(s): b43bcfd

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -9,11 +9,16 @@ client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 # pip install 'git+https://github.com/huggingface/transformers.git'
 from transformers import AutoTokenizer, AutoModelForCausalLM
 token=os.getenv('token')
 print('token = ',token)
 model_id = "CohereForAI/c4ai-command-r-plus"
 tokenizer = AutoTokenizer.from_pretrained(model_id, token= token)
-model = AutoModelForCausalLM.from_pretrained(model_id, token= token)
 # Format message with the command-r-plus chat template
 messages = [{"role": "user", "content": "Hello, how are you?"}]

 # pip install 'git+https://github.com/huggingface/transformers.git'
 from transformers import AutoTokenizer, AutoModelForCausalLM
+from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
+bnb_config = BitsAndBytesConfig(load_in_8bit=True)
 token=os.getenv('token')
 print('token = ',token)
 model_id = "CohereForAI/c4ai-command-r-plus"
 tokenizer = AutoTokenizer.from_pretrained(model_id, token= token)
+model = AutoModelForCausalLM.from_pretrained(model_id, token= token, quantization_config=bnb_config)
 # Format message with the command-r-plus chat template
 messages = [{"role": "user", "content": "Hello, how are you?"}]