FlawedLLM
commited on
Commit
•
41aa4c4
1
Parent(s):
b4bedb5
Update app.py
Browse files
app.py
CHANGED
@@ -43,10 +43,16 @@ import torch
|
|
43 |
# )
|
44 |
# tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/Bhashini")
|
45 |
# Load model directly
|
46 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM
|
47 |
|
48 |
tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/Bhashini_9")
|
49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
|
51 |
@spaces.GPU(duration=300)
|
52 |
def chunk_it(input_command):
|
|
|
43 |
# )
|
44 |
# tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/Bhashini")
|
45 |
# Load model directly
|
46 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
|
47 |
|
48 |
tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/Bhashini_9")
|
49 |
+
quantization_config = BitsAndBytesConfig(
|
50 |
+
load_in_4bit=True,
|
51 |
+
bnb_4bit_use_double_quant=True,
|
52 |
+
bnb_4bit_quant_type="nf4",
|
53 |
+
bnb_4bit_compute_dtype=torch.float16)
|
54 |
+
torch_dtype =torch.float16
|
55 |
+
model = AutoModelForCausalLM.from_pretrained("FlawedLLM/Bhashini_9", device_map='auto',torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True, quantization_config=quantization_config,)
|
56 |
|
57 |
@spaces.GPU(duration=300)
|
58 |
def chunk_it(input_command):
|