Spaces:

GameScribes
/

Multipurpose-AI-Agent-Development

Sleeping

devve1 commited on 21 days ago

Commit

1f7c7c9

•

1 Parent(s): 7524866

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -352,14 +352,14 @@ def load_models_and_documents():
         st.write('Downloading and Loading Mistral Nemo quantized with GPTQ and using Outlines + vLLM Engine as backend...')
         llm = LLM(
-            model="neuralmagic/Mistral-Nemo-Instruct-2407-quantized.w4a16",
             tensor_parallel_size=1,
             enforce_eager=True,
             gpu_memory_utilization=1,
             max_model_len=11264,
             dtype=torch.float16,
             max_num_seqs=128,
-            quantization="compressed-tensors"
         )
         model = models.VLLM(llm)

         st.write('Downloading and Loading Mistral Nemo quantized with GPTQ and using Outlines + vLLM Engine as backend...')
         llm = LLM(
+            model="shuyuej/Mistral-Nemo-Instruct-2407-GPTQ",
             tensor_parallel_size=1,
             enforce_eager=True,
             gpu_memory_utilization=1,
             max_model_len=11264,
             dtype=torch.float16,
             max_num_seqs=128,
+            quantization="gptq"
         )
         model = models.VLLM(llm)