devve1 commited on
Commit
1f7c7c9
1 Parent(s): 7524866

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -352,14 +352,14 @@ def load_models_and_documents():
352
  st.write('Downloading and Loading Mistral Nemo quantized with GPTQ and using Outlines + vLLM Engine as backend...')
353
 
354
  llm = LLM(
355
- model="neuralmagic/Mistral-Nemo-Instruct-2407-quantized.w4a16",
356
  tensor_parallel_size=1,
357
  enforce_eager=True,
358
  gpu_memory_utilization=1,
359
  max_model_len=11264,
360
  dtype=torch.float16,
361
  max_num_seqs=128,
362
- quantization="compressed-tensors"
363
  )
364
  model = models.VLLM(llm)
365
 
 
352
  st.write('Downloading and Loading Mistral Nemo quantized with GPTQ and using Outlines + vLLM Engine as backend...')
353
 
354
  llm = LLM(
355
+ model="shuyuej/Mistral-Nemo-Instruct-2407-GPTQ",
356
  tensor_parallel_size=1,
357
  enforce_eager=True,
358
  gpu_memory_utilization=1,
359
  max_model_len=11264,
360
  dtype=torch.float16,
361
  max_num_seqs=128,
362
+ quantization="gptq"
363
  )
364
  model = models.VLLM(llm)
365