BhashiniSpace_Text_gemma

FlawedLLM commited on May 22

Commit

41aa4c4

•

1 Parent(s): b4bedb5

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -43,10 +43,16 @@ import torch
 #     )
 # tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/Bhashini")
 # Load model directly
-from transformers import AutoTokenizer, AutoModelForCausalLM
 tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/Bhashini_9")
-model = AutoModelForCausalLM.from_pretrained("FlawedLLM/Bhashini_9", device_map='auto')
 @spaces.GPU(duration=300)
 def chunk_it(input_command):

 #     )
 # tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/Bhashini")
 # Load model directly
+from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/Bhashini_9")
+quantization_config = BitsAndBytesConfig(
+        load_in_4bit=True,
+        bnb_4bit_use_double_quant=True,
+        bnb_4bit_quant_type="nf4",
+        bnb_4bit_compute_dtype=torch.float16)
+torch_dtype =torch.float16
+model = AutoModelForCausalLM.from_pretrained("FlawedLLM/Bhashini_9", device_map='auto',torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True, quantization_config=quantization_config,)
 @spaces.GPU(duration=300)
 def chunk_it(input_command):