Update app.py
Browse files
app.py
CHANGED
@@ -183,10 +183,9 @@ def load_models_and_documents():
|
|
183 |
max_model_len=12288,
|
184 |
trust_remote_code=True,
|
185 |
enforce_eager=True,
|
186 |
-
quantization="
|
187 |
gpu_memory_utilization=0.9,
|
188 |
-
dtype='auto'
|
189 |
-
load_format="bitsandbytes"
|
190 |
)
|
191 |
model = models.VLLM(llm)
|
192 |
|
|
|
183 |
max_model_len=12288,
|
184 |
trust_remote_code=True,
|
185 |
enforce_eager=True,
|
186 |
+
quantization="awq",
|
187 |
gpu_memory_utilization=0.9,
|
188 |
+
dtype='auto'
|
|
|
189 |
)
|
190 |
model = models.VLLM(llm)
|
191 |
|