devve1 commited on
Commit
4f2edbe
1 Parent(s): 1e3af87

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -3
app.py CHANGED
@@ -183,10 +183,9 @@ def load_models_and_documents():
183
  max_model_len=12288,
184
  trust_remote_code=True,
185
  enforce_eager=True,
186
- quantization="bitsandbytes",
187
  gpu_memory_utilization=0.9,
188
- dtype='auto',
189
- load_format="bitsandbytes"
190
  )
191
  model = models.VLLM(llm)
192
 
 
183
  max_model_len=12288,
184
  trust_remote_code=True,
185
  enforce_eager=True,
186
+ quantization="awq",
187
  gpu_memory_utilization=0.9,
188
+ dtype='auto'
 
189
  )
190
  model = models.VLLM(llm)
191