devve1 commited on
Commit
3dcb8a9
1 Parent(s): 1346d2a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -178,15 +178,15 @@ def load_models_and_documents():
178
  tokenizer = AutoTokenizer.from_pretrained(model_path)
179
 
180
  llm = vllm.LLM(
181
- model_path,
182
  tensor_parallel_size=1,
183
  max_model_len=12288,
184
  trust_remote_code=True,
185
  enforce_eager=True,
186
- quantization='awq',
187
  gpu_memory_utilization=0.9,
188
- dtype='auto'
189
- #load_format='npcache'
190
  )
191
  model = models.VLLM(llm)
192
 
 
178
  tokenizer = AutoTokenizer.from_pretrained(model_path)
179
 
180
  llm = vllm.LLM(
181
+ model_path,
182
  tensor_parallel_size=1,
183
  max_model_len=12288,
184
  trust_remote_code=True,
185
  enforce_eager=True,
186
+ quantization="bitsandbytes",
187
  gpu_memory_utilization=0.9,
188
+ dtype='auto',
189
+ load_format="bitsandbytes"
190
  )
191
  model = models.VLLM(llm)
192