Update app.py
Browse files
app.py
CHANGED
@@ -352,14 +352,14 @@ def load_models_and_documents():
|
|
352 |
st.write('Downloading and Loading Mistral Nemo quantized with GPTQ and using Outlines + vLLM Engine as backend...')
|
353 |
|
354 |
llm = LLM(
|
355 |
-
model="
|
356 |
tensor_parallel_size=1,
|
357 |
enforce_eager=True,
|
358 |
gpu_memory_utilization=1,
|
359 |
max_model_len=11264,
|
360 |
dtype=torch.float16,
|
361 |
max_num_seqs=128,
|
362 |
-
quantization="
|
363 |
)
|
364 |
model = models.VLLM(llm)
|
365 |
|
|
|
352 |
st.write('Downloading and Loading Mistral Nemo quantized with GPTQ and using Outlines + vLLM Engine as backend...')
|
353 |
|
354 |
llm = LLM(
|
355 |
+
model="shuyuej/Mistral-Nemo-Instruct-2407-GPTQ",
|
356 |
tensor_parallel_size=1,
|
357 |
enforce_eager=True,
|
358 |
gpu_memory_utilization=1,
|
359 |
max_model_len=11264,
|
360 |
dtype=torch.float16,
|
361 |
max_num_seqs=128,
|
362 |
+
quantization="gptq"
|
363 |
)
|
364 |
model = models.VLLM(llm)
|
365 |
|