removed load_in_4bit from AutoModelForCausalLM
Browse files- handler.py +0 -1
handler.py
CHANGED
@@ -28,7 +28,6 @@ class EndpointHandler:
|
|
28 |
self.model = AutoModelForCausalLM.from_pretrained(
|
29 |
config.base_model_name_or_path,
|
30 |
return_dict=True,
|
31 |
-
load_in_4bit=True,
|
32 |
device_map={"": 0}, # Map to CUDA device 0
|
33 |
trust_remote_code=True,
|
34 |
quantization_config=bnb_config,
|
|
|
28 |
self.model = AutoModelForCausalLM.from_pretrained(
|
29 |
config.base_model_name_or_path,
|
30 |
return_dict=True,
|
|
|
31 |
device_map={"": 0}, # Map to CUDA device 0
|
32 |
trust_remote_code=True,
|
33 |
quantization_config=bnb_config,
|