Spaces:
Running
on
T4
Running
on
T4
Update app.py
Browse files
app.py
CHANGED
@@ -178,15 +178,15 @@ def load_models_and_documents():
|
|
178 |
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
179 |
|
180 |
llm = vllm.LLM(
|
181 |
-
model_path,
|
182 |
tensor_parallel_size=1,
|
183 |
max_model_len=12288,
|
184 |
trust_remote_code=True,
|
185 |
enforce_eager=True,
|
186 |
-
quantization=
|
187 |
gpu_memory_utilization=0.9,
|
188 |
-
dtype='auto'
|
189 |
-
|
190 |
)
|
191 |
model = models.VLLM(llm)
|
192 |
|
|
|
178 |
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
179 |
|
180 |
llm = vllm.LLM(
|
181 |
+
model_path,
|
182 |
tensor_parallel_size=1,
|
183 |
max_model_len=12288,
|
184 |
trust_remote_code=True,
|
185 |
enforce_eager=True,
|
186 |
+
quantization="bitsandbytes",
|
187 |
gpu_memory_utilization=0.9,
|
188 |
+
dtype='auto',
|
189 |
+
load_format="bitsandbytes"
|
190 |
)
|
191 |
model = models.VLLM(llm)
|
192 |
|