FlawedLLM commited on
Commit
acee492
1 Parent(s): 46b1d37

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -1
app.py CHANGED
@@ -10,7 +10,14 @@ from bitsandbytes.functional import quantize_blockwise
10
 
11
  tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/Bhashini_gemma_merged4bit_clean_final")
12
  model = AutoModelForCausalLM.from_pretrained("FlawedLLM/Bhashini_gemma_merged4bit_clean_final")
13
- model= quantize_blockwise(model)
 
 
 
 
 
 
 
14
  # alpaca_prompt = You MUST copy from above!
15
  @spaces.GPU(duration=300)
16
  def chunk_it(input_command, item_list):
 
10
 
11
  tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/Bhashini_gemma_merged4bit_clean_final")
12
  model = AutoModelForCausalLM.from_pretrained("FlawedLLM/Bhashini_gemma_merged4bit_clean_final")
13
+ def quantize_model(model):
14
+ # Iterate over model parameters, not the entire model object
15
+ for name, module in model.named_modules():
16
+ if isinstance(module, torch.nn.Linear): # Quantize only Linear layers
17
+ module = quantize_blockwise(module)
18
+
19
+ # Quantize the model (modified)
20
+ quantize_model(model)
21
  # alpaca_prompt = You MUST copy from above!
22
  @spaces.GPU(duration=300)
23
  def chunk_it(input_command, item_list):